1
0

reorganize moving non-submodule packages into src/main/scala

This commit is contained in:
Howard Mao
2016-08-19 10:58:56 -07:00
parent f78da0b0ea
commit 7b20609d4d
110 changed files with 3 additions and 381 deletions

View File

@ -0,0 +1,381 @@
// See LICENSE for license details.
package coreplex
import Chisel._
import junctions._
import uncore.tilelink._
import uncore.coherence._
import uncore.agents._
import uncore.devices._
import uncore.converters._
import rocket._
import rocket.Util._
import scala.math.max
import scala.collection.mutable.{LinkedHashSet, ListBuffer}
import DefaultTestSuites._
import cde.{Parameters, Config, Dump, Knob, CDEMatchError}
object ConfigUtils {
def max_int(values: Int*): Int = {
values.reduce((a, b) => max(a, b))
}
}
import ConfigUtils._
class BaseCoreplexConfig extends Config (
topDefinitions = { (pname,site,here) =>
type PF = PartialFunction[Any,Any]
def findBy(sname:Any):Any = here[PF](site[Any](sname))(pname)
lazy val innerDataBits = 64
lazy val innerDataBeats = (8 * site(CacheBlockBytes)) / innerDataBits
pname match {
//Memory Parameters
case PAddrBits => 32
case PgLevels => if (site(XLen) == 64) 3 /* Sv39 */ else 2 /* Sv32 */
case ASIdBits => 7
//Params used by all caches
case NSets => findBy(CacheName)
case NWays => findBy(CacheName)
case RowBits => findBy(CacheName)
case NTLBEntries => findBy(CacheName)
case CacheIdBits => findBy(CacheName)
case SplitMetadata => findBy(CacheName)
case "L1I" => {
case NSets => Knob("L1I_SETS") //64
case NWays => Knob("L1I_WAYS") //4
case RowBits => site(TLKey("L1toL2")).dataBitsPerBeat
case NTLBEntries => 8
case CacheIdBits => 0
case SplitMetadata => false
}:PF
case "L1D" => {
case NSets => Knob("L1D_SETS") //64
case NWays => Knob("L1D_WAYS") //4
case RowBits => site(TLKey("L1toL2")).dataBitsPerBeat
case NTLBEntries => 8
case CacheIdBits => 0
case SplitMetadata => false
}:PF
case ECCCode => None
case Replacer => () => new RandomReplacement(site(NWays))
//L1InstCache
case BtbKey => BtbParameters()
//L1DataCache
case DCacheKey => DCacheConfig(nMSHRs = site(Knob("L1D_MSHRS")))
//L2 Memory System Params
case AmoAluOperandBits => site(XLen)
case NAcquireTransactors => 7
case L2StoreDataQueueDepth => 1
case L2DirectoryRepresentation => new NullRepresentation(site(NTiles))
case BuildL2CoherenceManager => (id: Int, p: Parameters) =>
Module(new L2BroadcastHub()(p.alterPartial({
case InnerTLId => "L1toL2"
case OuterTLId => "L2toMC" })))
case NCachedTileLinkPorts => 1
case NUncachedTileLinkPorts => 1
//Tile Constants
case BuildTiles => {
val env = if(site(UseVM)) List("p","v") else List("p")
site(FPUKey) foreach { case cfg =>
TestGeneration.addSuite(rv32udBenchmarks)
TestGeneration.addSuites(env.map(rv64ufNoDiv))
TestGeneration.addSuites(env.map(rv64udNoDiv))
if (cfg.divSqrt) {
TestGeneration.addSuites(env.map(rv64uf))
TestGeneration.addSuites(env.map(rv64ud))
}
}
if (site(UseAtomics)) TestGeneration.addSuites(env.map(if (site(XLen) == 64) rv64ua else rv32ua))
if (site(UseCompressed)) TestGeneration.addSuites(env.map(if (site(XLen) == 64) rv64uc else rv32uc))
val (rvi, rvu) =
if (site(XLen) == 64) ((if (site(UseVM)) rv64i else rv64pi), rv64u)
else ((if (site(UseVM)) rv32i else rv32pi), rv32u)
TestGeneration.addSuites(rvi.map(_("p")))
TestGeneration.addSuites((if(site(UseVM)) List("v") else List()).flatMap(env => rvu.map(_(env))))
TestGeneration.addSuite(if (site(UseVM)) benchmarks else emptyBmarks)
List.fill(site(NTiles)){ (r: Bool, p: Parameters) =>
Module(new RocketTile(resetSignal = r)(p.alterPartial({
case TLId => "L1toL2"
case NUncachedTileLinkPorts => 1 + site(RoccNMemChannels)
})))
}
}
case BuildRoCC => Nil
case RoccNMemChannels => site(BuildRoCC).map(_.nMemChannels).foldLeft(0)(_ + _)
case RoccNPTWPorts => site(BuildRoCC).map(_.nPTWPorts).foldLeft(0)(_ + _)
//Rocket Core Constants
case CoreInstBits => if (site(UseCompressed)) 16 else 32
case FetchWidth => if (site(UseCompressed)) 2 else 1
case RetireWidth => 1
case UseVM => true
case UseUser => true
case UseDebug => true
case NBreakpoints => 1
case FastLoadWord => true
case FastLoadByte => false
case XLen => 64
case FPUKey => Some(FPUConfig())
case MulDivKey => Some(MulDivConfig())
case UseAtomics => true
case UseCompressed => true
case PLICKey => PLICConfig(site(NTiles), site(UseVM), site(NExtInterrupts), 0)
case DMKey => new DefaultDebugModuleConfig(site(NTiles), site(XLen))
case NCustomMRWCSRs => 0
case ResetVector => BigInt(0x1000)
case MtvecInit => BigInt(0x1010)
case MtvecWritable => true
//Uncore Paramters
case RTCPeriod => 100 // gives 10 MHz RTC assuming 1 GHz uncore clock
case LNEndpoints => site(TLKey(site(TLId))).nManagers + site(TLKey(site(TLId))).nClients
case LNHeaderBits => log2Ceil(site(TLKey(site(TLId))).nManagers) +
log2Up(site(TLKey(site(TLId))).nClients)
case TLKey("L1toL2") => {
val useMEI = site(NTiles) <= 1 && site(NCachedTileLinkPorts) <= 1
TileLinkParameters(
coherencePolicy = (
if (useMEI) new MEICoherence(site(L2DirectoryRepresentation))
else new MESICoherence(site(L2DirectoryRepresentation))),
nManagers = site(NBanksPerMemoryChannel)*site(NMemoryChannels) + 1 /* MMIO */,
nCachingClients = site(NCachedTileLinkPorts),
nCachelessClients = site(NExternalClients) + site(NUncachedTileLinkPorts),
maxClientXacts = max_int(
// L1 cache
site(DCacheKey).nMSHRs + 1 /* IOMSHR */,
// RoCC
if (site(BuildRoCC).isEmpty) 1 else site(RoccMaxTaggedMemXacts)),
maxClientsPerPort = if (site(BuildRoCC).isEmpty) 1 else 2,
maxManagerXacts = site(NAcquireTransactors) + 2,
dataBeats = innerDataBeats,
dataBits = site(CacheBlockBytes)*8)
}
case TLKey("L2toMC") =>
TileLinkParameters(
coherencePolicy = new MEICoherence(
new NullRepresentation(site(NBanksPerMemoryChannel))),
nManagers = 1,
nCachingClients = site(NBanksPerMemoryChannel),
nCachelessClients = 0,
maxClientXacts = 1,
maxClientsPerPort = site(NAcquireTransactors) + 2,
maxManagerXacts = 1,
dataBeats = innerDataBeats,
dataBits = site(CacheBlockBytes)*8)
case TLKey("Outermost") => site(TLKey("L2toMC")).copy(
maxClientXacts = site(NAcquireTransactors) + 2,
maxClientsPerPort = site(NBanksPerMemoryChannel),
dataBeats = site(MIFDataBeats))
case TLKey("L2toMMIO") => {
TileLinkParameters(
coherencePolicy = new MICoherence(
new NullRepresentation(site(NBanksPerMemoryChannel))),
nManagers = site(GlobalAddrMap).subMap("io").numSlaves,
nCachingClients = 0,
nCachelessClients = 1,
maxClientXacts = 4,
maxClientsPerPort = 1,
maxManagerXacts = 1,
dataBeats = innerDataBeats,
dataBits = site(CacheBlockBytes) * 8)
}
case TLKey("MMIO_Outermost") => site(TLKey("L2toMMIO")).copy(dataBeats = site(MIFDataBeats))
case BootROMFile => "./bootrom/bootrom.img"
case NTiles => Knob("NTILES")
case NBanksPerMemoryChannel => Knob("NBANKS_PER_MEM_CHANNEL")
case BankIdLSB => 0
case CacheBlockBytes => Dump("CACHE_BLOCK_BYTES", 64)
case CacheBlockOffsetBits => log2Up(here(CacheBlockBytes))
case EnableL2Logging => false
case ExtraCoreplexPorts => (p: Parameters) => new Bundle
case RegressionTestNames => LinkedHashSet(
"rv64ud-v-fcvt",
"rv64ud-p-fdiv",
"rv64ud-v-fadd",
"rv64uf-v-fadd",
"rv64um-v-mul",
"rv64mi-p-breakpoint",
"rv64uc-v-rvc",
"rv64ud-v-structural",
"rv64si-p-wfi",
"rv64um-v-divw",
"rv64ua-v-lrsc",
"rv64ui-v-fence_i",
"rv64ud-v-fcvt_w",
"rv64uf-v-fmin",
"rv64ui-v-sb",
"rv64ua-v-amomax_d",
"rv64ud-v-move",
"rv64ud-v-fclass",
"rv64ua-v-amoand_d",
"rv64ua-v-amoxor_d",
"rv64si-p-sbreak",
"rv64ud-v-fmadd",
"rv64uf-v-ldst",
"rv64um-v-mulh",
"rv64si-p-dirty")
case _ => throw new CDEMatchError
}},
knobValues = {
case "NTILES" => 1
case "NBANKS_PER_MEM_CHANNEL" => 1
case "L1D_MSHRS" => 2
case "L1D_SETS" => 64
case "L1D_WAYS" => 4
case "L1I_SETS" => 64
case "L1I_WAYS" => 4
case _ => throw new CDEMatchError
}
)
class WithNCores(n: Int) extends Config(
knobValues = { case"NTILES" => n; case _ => throw new CDEMatchError })
class WithNBanksPerMemChannel(n: Int) extends Config(
knobValues = {
case "NBANKS_PER_MEM_CHANNEL" => n
case _ => throw new CDEMatchError
})
class WithL2Cache extends Config(
(pname,site,here) => pname match {
case "L2_CAPACITY_IN_KB" => Knob("L2_CAPACITY_IN_KB")
case "L2Bank" => {
case NSets => (((here[Int]("L2_CAPACITY_IN_KB")*1024) /
site(CacheBlockBytes)) /
(site(NBanksPerMemoryChannel)*site(NMemoryChannels))) /
site(NWays)
case NWays => Knob("L2_WAYS")
case RowBits => site(TLKey(site(TLId))).dataBitsPerBeat
case CacheIdBits => log2Ceil(site(NMemoryChannels) * site(NBanksPerMemoryChannel))
case SplitMetadata => Knob("L2_SPLIT_METADATA")
}: PartialFunction[Any,Any]
case NAcquireTransactors => 2
case NSecondaryMisses => 4
case L2DirectoryRepresentation => new FullRepresentation(site(NTiles))
case BuildL2CoherenceManager => (id: Int, p: Parameters) =>
Module(new L2HellaCacheBank()(p.alterPartial({
case CacheId => id
case CacheName => "L2Bank"
case InnerTLId => "L1toL2"
case OuterTLId => "L2toMC"})))
case L2Replacer => () => new SeqRandom(site(NWays))
case _ => throw new CDEMatchError
},
knobValues = { case "L2_WAYS" => 8; case "L2_CAPACITY_IN_KB" => 2048; case "L2_SPLIT_METADATA" => false; case _ => throw new CDEMatchError }
)
class WithBufferlessBroadcastHub extends Config(
(pname, site, here) => pname match {
case BuildL2CoherenceManager => (id: Int, p: Parameters) =>
Module(new BufferlessBroadcastHub()(p.alterPartial({
case InnerTLId => "L1toL2"
case OuterTLId => "L2toMC" })))
})
/**
* WARNING!!! IGNORE AT YOUR OWN PERIL!!!
*
* There is a very restrictive set of conditions under which the stateless
* bridge will function properly. There can only be a single tile. This tile
* MUST use the blocking data cache (L1D_MSHRS == 0) and MUST NOT have an
* uncached channel capable of writes (i.e. a RoCC accelerator).
*
* This is because the stateless bridge CANNOT generate probes, so if your
* system depends on coherence between channels in any way,
* DO NOT use this configuration.
*/
class WithStatelessBridge extends Config (
topDefinitions = (pname, site, here) => pname match {
case BuildL2CoherenceManager => (id: Int, p: Parameters) =>
Module(new ManagerToClientStatelessBridge()(p.alterPartial({
case InnerTLId => "L1toL2"
case OuterTLId => "L2toMC" })))
},
knobValues = {
case "L1D_MSHRS" => 0
case _ => throw new CDEMatchError
}
)
class WithPLRU extends Config(
(pname, site, here) => pname match {
case L2Replacer => () => new SeqPLRU(site(NSets), site(NWays))
case _ => throw new CDEMatchError
})
class WithL2Capacity(size_kb: Int) extends Config(
knobValues = {
case "L2_CAPACITY_IN_KB" => size_kb
case _ => throw new CDEMatchError
})
class WithNL2Ways(n: Int) extends Config(
knobValues = {
case "L2_WAYS" => n
case _ => throw new CDEMatchError
})
class WithRV32 extends Config(
(pname,site,here) => pname match {
case XLen => 32
case UseVM => false
case UseUser => false
case UseAtomics => false
case FPUKey => None
case RegressionTestNames => LinkedHashSet(
"rv32mi-p-ma_addr",
"rv32mi-p-csr",
"rv32ui-p-sh",
"rv32ui-p-lh",
"rv32mi-p-sbreak",
"rv32ui-p-sll")
case _ => throw new CDEMatchError
}
)
class WithBlockingL1 extends Config (
knobValues = {
case "L1D_MSHRS" => 0
case _ => throw new CDEMatchError
}
)
class WithSmallCores extends Config (
topDefinitions = { (pname,site,here) => pname match {
case FPUKey => None
case NTLBEntries => 4
case BtbKey => BtbParameters(nEntries = 0)
case NAcquireTransactors => 2
case _ => throw new CDEMatchError
}},
knobValues = {
case "L1D_SETS" => 64
case "L1D_WAYS" => 1
case "L1I_SETS" => 64
case "L1I_WAYS" => 1
case "L1D_MSHRS" => 0
case _ => throw new CDEMatchError
}
)
class WithRoccExample extends Config(
(pname, site, here) => pname match {
case BuildRoCC => Seq(
RoccParameters(
opcodes = OpcodeSet.custom0,
generator = (p: Parameters) => Module(new AccumulatorExample()(p))),
RoccParameters(
opcodes = OpcodeSet.custom1,
generator = (p: Parameters) => Module(new TranslatorExample()(p)),
nPTWPorts = 1),
RoccParameters(
opcodes = OpcodeSet.custom2,
generator = (p: Parameters) => Module(new CharacterCountExample()(p))))
case RoccMaxTaggedMemXacts => 1
case _ => throw new CDEMatchError
})
class WithSplitL2Metadata extends Config(
knobValues = { case "L2_SPLIT_METADATA" => true; case _ => throw new CDEMatchError })

View File

@ -0,0 +1,283 @@
package coreplex
import Chisel._
import cde.{Parameters, Field}
import junctions._
import uncore.tilelink._
import uncore.coherence._
import uncore.agents._
import uncore.devices._
import uncore.util._
import uncore.converters._
import rocket._
import rocket.Util._
import java.nio.{ByteBuffer,ByteOrder}
import java.nio.file.{Files, Paths}
/** Number of memory channels */
case object NMemoryChannels extends Field[Int]
/** Number of banks per memory channel */
case object NBanksPerMemoryChannel extends Field[Int]
/** Least significant bit of address used for bank partitioning */
case object BankIdLSB extends Field[Int]
/** Function for building some kind of coherence manager agent */
case object BuildL2CoherenceManager extends Field[(Int, Parameters) => CoherenceAgent]
/** Function for building some kind of tile connected to a reset signal */
case object BuildTiles extends Field[Seq[(Bool, Parameters) => Tile]]
/** A string describing on-chip devices, readable by target software */
case object ConfigString extends Field[Array[Byte]]
/** Number of external interrupt sources */
case object NExtInterrupts extends Field[Int]
/** Interrupt controller configuration */
case object PLICKey extends Field[PLICConfig]
/** Number of clock cycles per RTC tick */
case object RTCPeriod extends Field[Int]
/** The file to read the BootROM contents from */
case object BootROMFile extends Field[String]
/** Export an external MMIO slave port */
case object ExportMMIOPort extends Field[Boolean]
/** Expose additional TileLink client ports */
case object NExternalClients extends Field[Int]
/** Extra top-level ports exported from the coreplex */
case object ExtraCoreplexPorts extends Field[Parameters => Bundle]
trait HasCoreplexParameters {
implicit val p: Parameters
lazy val nTiles = p(NTiles)
lazy val nCachedTilePorts = p(NCachedTileLinkPorts)
lazy val nUncachedTilePorts = p(NUncachedTileLinkPorts)
lazy val nMemChannels = p(NMemoryChannels)
lazy val nBanksPerMemChannel = p(NBanksPerMemoryChannel)
lazy val nBanks = nMemChannels*nBanksPerMemChannel
lazy val lsb = p(BankIdLSB)
lazy val innerParams = p.alterPartial({ case TLId => "L1toL2" })
lazy val outermostParams = p.alterPartial({ case TLId => "Outermost" })
lazy val outermostMMIOParams = p.alterPartial({ case TLId => "MMIO_Outermost" })
lazy val nExtClients = p(NExternalClients)
lazy val exportMMIO = p(ExportMMIOPort)
}
/** Wrapper around everything that isn't a Tile.
*
* Usually this is clocked and/or place-and-routed separately from the Tiles.
*/
class Uncore(implicit val p: Parameters) extends Module
with HasCoreplexParameters {
val io = new Bundle {
val mem = Vec(nMemChannels, new ClientUncachedTileLinkIO()(outermostParams))
val tiles_cached = Vec(nCachedTilePorts, new ClientTileLinkIO).flip
val tiles_uncached = Vec(nUncachedTilePorts, new ClientUncachedTileLinkIO).flip
val ext_uncached = Vec(nExtClients, new ClientUncachedTileLinkIO()(innerParams)).flip
val prci = Vec(nTiles, new PRCITileIO).asOutput
val mmio = if (exportMMIO) Some(new ClientUncachedTileLinkIO()(outermostMMIOParams)) else None
val interrupts = Vec(p(NExtInterrupts), Bool()).asInput
val debug = new DebugBusIO()(p).flip
}
val outmemsys = if (nCachedTilePorts + nUncachedTilePorts > 0)
Module(new DefaultOuterMemorySystem) // NoC, LLC and SerDes
else Module(new DummyOuterMemorySystem)
outmemsys.io.incoherent foreach (_ := false)
outmemsys.io.tiles_uncached <> io.tiles_uncached
outmemsys.io.tiles_cached <> io.tiles_cached
outmemsys.io.ext_uncached <> io.ext_uncached
io.mem <> outmemsys.io.mem
buildMMIONetwork(p.alterPartial({case TLId => "L2toMMIO"}))
def makeBootROM()(implicit p: Parameters) = {
val romdata = Files.readAllBytes(Paths.get(p(BootROMFile)))
val rom = ByteBuffer.wrap(romdata)
rom.order(ByteOrder.LITTLE_ENDIAN)
// for now, have the reset vector jump straight to memory
val resetToMemDist = p(GlobalAddrMap)("mem").start - p(ResetVector)
require(resetToMemDist == (resetToMemDist.toInt >> 12 << 12))
val configStringAddr = p(ResetVector).toInt + rom.capacity
require(rom.getInt(12) == 0,
"Config string address position should not be occupied by code")
rom.putInt(12, configStringAddr)
rom.array() ++ p(ConfigString).toSeq
}
def buildMMIONetwork(implicit p: Parameters) = {
val ioAddrMap = p(GlobalAddrMap).subMap("io")
val mmioNetwork = Module(new TileLinkRecursiveInterconnect(1, ioAddrMap))
mmioNetwork.io.in.head <> outmemsys.io.mmio
val plic = Module(new PLIC(p(PLICKey)))
plic.io.tl <> mmioNetwork.port("int:plic")
for (i <- 0 until io.interrupts.size) {
val gateway = Module(new LevelGateway)
gateway.io.interrupt := io.interrupts(i)
plic.io.devices(i) <> gateway.io.plic
}
val debugModule = Module(new DebugModule)
debugModule.io.tl <> mmioNetwork.port("int:debug")
debugModule.io.db <> io.debug
val prci = Module(new PRCI)
prci.io.tl <> mmioNetwork.port("int:prci")
io.prci := prci.io.tiles
prci.io.rtcTick := Counter(p(RTCPeriod)).inc() // placeholder for real RTC
for (i <- 0 until nTiles) {
prci.io.interrupts(i).meip := plic.io.harts(plic.cfg.context(i, 'M'))
if (p(UseVM))
prci.io.interrupts(i).seip := plic.io.harts(plic.cfg.context(i, 'S'))
prci.io.interrupts(i).debug := debugModule.io.debugInterrupts(i)
io.prci(i).reset := reset
}
val bootROM = Module(new ROMSlave(makeBootROM()))
bootROM.io <> mmioNetwork.port("int:bootrom")
io.mmio.map { ext => ext <> mmioNetwork.port("ext") }
}
}
abstract class OuterMemorySystem(implicit val p: Parameters)
extends Module with HasCoreplexParameters {
val io = new Bundle {
val tiles_cached = Vec(nCachedTilePorts, new ClientTileLinkIO).flip
val tiles_uncached = Vec(nUncachedTilePorts, new ClientUncachedTileLinkIO).flip
val ext_uncached = Vec(nExtClients, new ClientUncachedTileLinkIO()(innerParams)).flip
val incoherent = Vec(nCachedTilePorts, Bool()).asInput
val mem = Vec(nMemChannels, new ClientUncachedTileLinkIO()(outermostParams))
val mmio = new ClientUncachedTileLinkIO()(p.alterPartial({case TLId => "L2toMMIO"}))
}
}
/** Use in place of OuterMemorySystem if there are no clients to connect. */
class DummyOuterMemorySystem(implicit p: Parameters) extends OuterMemorySystem()(p) {
require(nCachedTilePorts + nUncachedTilePorts + nExtClients == 0)
io.mem.foreach { tl =>
tl.acquire.valid := Bool(false)
tl.grant.ready := Bool(false)
}
io.mmio.acquire.valid := Bool(false)
io.mmio.grant.ready := Bool(false)
}
/** The whole outer memory hierarchy, including a NoC, some kind of coherence
* manager agent, and a converter from TileLink to MemIO.
*/
class DefaultOuterMemorySystem(implicit p: Parameters) extends OuterMemorySystem()(p) {
// Create a simple L1toL2 NoC between the tiles and the banks of outer memory
// Cached ports are first in client list, making sharerToClientId just an indentity function
// addrToBank is sed to hash physical addresses (of cache blocks) to banks (and thereby memory channels)
def sharerToClientId(sharerId: UInt) = sharerId
def addrToBank(addr: UInt): UInt = {
val isMemory = p(GlobalAddrMap).isInRegion("mem", addr << log2Up(p(CacheBlockBytes)))
Mux(isMemory,
if (nBanks > 1) addr(lsb + log2Up(nBanks) - 1, lsb) else UInt(0),
UInt(nBanks))
}
val preBuffering = TileLinkDepths(1,1,2,2,0)
val l1tol2net = Module(new PortedTileLinkCrossbar(addrToBank, sharerToClientId, preBuffering))
// Create point(s) of coherence serialization
val managerEndpoints = List.tabulate(nBanks){id => p(BuildL2CoherenceManager)(id, p)}
managerEndpoints.foreach { _.incoherent := io.incoherent }
val mmioManager = Module(new MMIOTileLinkManager()(p.alterPartial({
case TLId => "L1toL2"
case InnerTLId => "L1toL2"
case OuterTLId => "L2toMMIO"
})))
io.mmio <> mmioManager.io.outer
// Wire the tiles to the TileLink client ports of the L1toL2 network,
// and coherence manager(s) to the other side
l1tol2net.io.clients_cached <> io.tiles_cached
l1tol2net.io.clients_uncached <> io.tiles_uncached ++ io.ext_uncached
l1tol2net.io.managers <> managerEndpoints.map(_.innerTL) :+ mmioManager.io.inner
// Create a converter between TileLinkIO and MemIO for each channel
val outerTLParams = p.alterPartial({ case TLId => "L2toMC" })
val backendBuffering = TileLinkDepths(0,0,0,0,0)
// TODO: the code to print this stuff should live somewhere else
println("Generated Address Map")
for (entry <- p(GlobalAddrMap).flatten) {
val name = entry.name
val start = entry.region.start
val end = entry.region.start + entry.region.size - 1
println(f"\t$name%s $start%x - $end%x")
}
println("Generated Configuration String")
println(new String(p(ConfigString)))
val mem_ic = Module(new TileLinkMemoryInterconnect(nBanksPerMemChannel, nMemChannels)(outermostParams))
for ((bank, icPort) <- managerEndpoints zip mem_ic.io.in) {
val unwrap = Module(new ClientTileLinkIOUnwrapper()(outerTLParams))
unwrap.io.in <> ClientTileLinkEnqueuer(bank.outerTL, backendBuffering)(outerTLParams)
TileLinkWidthAdapter(icPort, unwrap.io.out)
}
io.mem <> mem_ic.io.out
}
abstract class Coreplex(implicit val p: Parameters) extends Module
with HasCoreplexParameters {
class CoreplexIO(implicit val p: Parameters) extends Bundle {
val mem = Vec(nMemChannels, new ClientUncachedTileLinkIO()(outermostParams))
val ext_clients = Vec(nExtClients, new ClientUncachedTileLinkIO()(innerParams)).flip
val mmio = if(p(ExportMMIOPort)) Some(new ClientUncachedTileLinkIO()(outermostMMIOParams)) else None
val interrupts = Vec(p(NExtInterrupts), Bool()).asInput
val debug = new DebugBusIO()(p).flip
val extra = p(ExtraCoreplexPorts)(p)
val success: Option[Bool] = if (hasSuccessFlag) Some(Bool(OUTPUT)) else None
}
def hasSuccessFlag: Boolean = false
val io = new CoreplexIO
}
class DefaultCoreplex(topParams: Parameters) extends Coreplex()(topParams) {
// Build an Uncore and a set of Tiles
val tileResets = Wire(Vec(nTiles, Bool()))
val tileList = p(BuildTiles).zip(tileResets).map {
case (tile, rst) => tile(rst, p)
}
val nCachedPorts = tileList.map(tile => tile.io.cached.size).reduce(_ + _)
val nUncachedPorts = tileList.map(tile => tile.io.uncached.size).reduce(_ + _)
val innerTLParams = p.alterPartial({
case HastiId => "TL"
case TLId => "L1toL2"
case NCachedTileLinkPorts => nCachedPorts
case NUncachedTileLinkPorts => nUncachedPorts
})
val uncore = Module(new Uncore()(innerTLParams))
(uncore.io.prci, tileResets, tileList).zipped.foreach {
case (prci, rst, tile) =>
rst := prci.reset
tile.io.prci <> prci
}
// Connect the uncore to the tile memory ports, HostIO and MemIO
uncore.io.tiles_cached <> tileList.map(_.io.cached).flatten
uncore.io.tiles_uncached <> tileList.map(_.io.uncached).flatten
uncore.io.interrupts <> io.interrupts
uncore.io.debug <> io.debug
uncore.io.ext_uncached <> io.ext_clients
if (exportMMIO) { io.mmio.get <> uncore.io.mmio.get }
io.mem <> uncore.io.mem
}
class GroundTestCoreplex(topParams: Parameters) extends DefaultCoreplex(topParams) {
override def hasSuccessFlag = true
io.success.get := tileList.flatMap(_.io.elements get "success").map(_.asInstanceOf[Bool]).reduce(_&&_)
}

View File

@ -0,0 +1,62 @@
package coreplex
import Chisel._
import cde.{Parameters, Field}
import groundtest._
import uncore.tilelink._
import uncore.agents._
case object ExportGroundTestStatus extends Field[Boolean]
class DirectGroundTestCoreplex(topParams: Parameters) extends Coreplex()(topParams) {
// Not using the debug
io.debug.req.ready := Bool(false)
io.debug.resp.valid := Bool(false)
require(!exportMMIO)
require(nExtClients == 0)
require(nMemChannels == 1)
require(nTiles == 1)
val test = p(BuildGroundTest)(outermostParams.alterPartial({
case GroundTestId => 0
case CacheName => "L1D"
}))
require(test.io.cache.size == 0)
require(test.io.mem.size == nBanksPerMemChannel)
require(test.io.ptw.size == 0)
val mem_ic = Module(new TileLinkMemoryInterconnect(
nBanksPerMemChannel, nMemChannels)(outermostParams))
mem_ic.io.in <> test.io.mem
io.mem <> mem_ic.io.out
if (p(ExportGroundTestStatus)) {
val status = io.extra.asInstanceOf[GroundTestStatus]
val s_running :: s_finished :: s_errored :: s_timeout :: Nil = Enum(Bits(), 4)
val state = Reg(init = s_running)
val error_code = Reg(status.error.bits)
val timeout_code = Reg(status.timeout.bits)
when (state === s_running) {
when (test.io.status.finished) { state := s_finished }
when (test.io.status.error.valid) {
state := s_errored
error_code := test.io.status.error.bits
}
when (test.io.status.timeout.valid) {
state := s_timeout
timeout_code := test.io.status.timeout.bits
}
}
status.finished := (state === s_finished)
status.error.valid := (state === s_errored)
status.error.bits := error_code
status.timeout.valid := (state === s_timeout)
status.timeout.bits := timeout_code
}
override def hasSuccessFlag = true
io.success.get := test.io.status.finished
}

View File

@ -0,0 +1,200 @@
package coreplex
import Chisel._
import groundtest._
import rocket._
import uncore.tilelink._
import uncore.coherence._
import uncore.agents._
import uncore.devices.NTiles
import uncore.unittests._
import junctions._
import junctions.unittests._
import scala.collection.mutable.LinkedHashSet
import cde.{Parameters, Config, Dump, Knob, CDEMatchError}
import scala.math.max
import ConfigUtils._
class WithComparator extends Config(
(pname, site, here) => pname match {
case GroundTestKey => Seq.fill(site(NTiles)) {
GroundTestTileSettings(uncached = site(ComparatorKey).targets.size)
}
case BuildGroundTest =>
(p: Parameters) => Module(new ComparatorCore()(p))
case ComparatorKey => ComparatorParameters(
targets = Seq("mem", "io:ext:testram").map(name =>
site(GlobalAddrMap)(name).start.longValue),
width = 8,
operations = 1000,
atomics = site(UseAtomics),
prefetches = site("COMPARATOR_PREFETCHES"))
case FPUConfig => None
case UseAtomics => false
case "COMPARATOR_PREFETCHES" => false
case _ => throw new CDEMatchError
})
class WithAtomics extends Config(
(pname, site, here) => pname match {
case UseAtomics => true
case _ => throw new CDEMatchError
})
class WithPrefetches extends Config(
(pname, site, here) => pname match {
case "COMPARATOR_PREFETCHES" => true
case _ => throw new CDEMatchError
})
class WithMemtest extends Config(
(pname, site, here) => pname match {
case GroundTestKey => Seq.fill(site(NTiles)) {
GroundTestTileSettings(1, 1)
}
case GeneratorKey => GeneratorParameters(
maxRequests = 128,
startAddress = site(GlobalAddrMap)("mem").start)
case BuildGroundTest =>
(p: Parameters) => Module(new GeneratorTest()(p))
case _ => throw new CDEMatchError
})
class WithNGenerators(nUncached: Int, nCached: Int) extends Config(
(pname, site, here) => pname match {
case GroundTestKey => Seq.fill(site(NTiles)) {
GroundTestTileSettings(nUncached, nCached)
}
case _ => throw new CDEMatchError
})
class WithCacheFillTest extends Config(
(pname, site, here) => pname match {
case GroundTestKey => Seq.fill(site(NTiles)) {
GroundTestTileSettings(uncached = 1)
}
case BuildGroundTest =>
(p: Parameters) => Module(new CacheFillTest()(p))
case _ => throw new CDEMatchError
},
knobValues = {
case "L2_WAYS" => 4
case "L2_CAPACITY_IN_KB" => 4
case _ => throw new CDEMatchError
})
class WithBroadcastRegressionTest extends Config(
(pname, site, here) => pname match {
case GroundTestKey => Seq.fill(site(NTiles)) {
GroundTestTileSettings(1, 1, maxXacts = 3)
}
case BuildGroundTest =>
(p: Parameters) => Module(new RegressionTest()(p))
case GroundTestRegressions =>
(p: Parameters) => RegressionTests.broadcastRegressions(p)
case _ => throw new CDEMatchError
})
class WithCacheRegressionTest extends Config(
(pname, site, here) => pname match {
case GroundTestKey => Seq.fill(site(NTiles)) {
GroundTestTileSettings(1, 1, maxXacts = 5)
}
case BuildGroundTest =>
(p: Parameters) => Module(new RegressionTest()(p))
case GroundTestRegressions =>
(p: Parameters) => RegressionTests.cacheRegressions(p)
case _ => throw new CDEMatchError
})
class WithNastiConverterTest extends Config(
(pname, site, here) => pname match {
case GroundTestKey => Seq.fill(site(NTiles)) {
GroundTestTileSettings(uncached = 1)
}
case GeneratorKey => GeneratorParameters(
maxRequests = 128,
startAddress = site(GlobalAddrMap)("mem").start)
case BuildGroundTest =>
(p: Parameters) => Module(new NastiConverterTest()(p))
case _ => throw new CDEMatchError
})
class WithTraceGen extends Config(
topDefinitions = (pname, site, here) => pname match {
case GroundTestKey => Seq.fill(site(NTiles)) {
GroundTestTileSettings(uncached = 1, cached = 1)
}
case BuildGroundTest =>
(p: Parameters) => Module(new GroundTestTraceGenerator()(p))
case GeneratorKey => GeneratorParameters(
maxRequests = 256,
startAddress = 0)
case AddressBag => {
val nSets = 32 // L2 NSets
val nWays = 1
val blockOffset = site(CacheBlockOffsetBits)
val baseAddr = site(GlobalAddrMap)("mem").start
val nBeats = site(MIFDataBeats)
List.tabulate(4 * nWays) { i =>
Seq.tabulate(nBeats) { j => (j * 8) + ((i * nSets) << blockOffset) }
}.flatten.map(addr => baseAddr + BigInt(addr))
}
case UseAtomics => true
case _ => throw new CDEMatchError
},
knobValues = {
case "L1D_SETS" => 16
case "L1D_WAYS" => 1
})
class WithPCIeMockupTest extends Config(
(pname, site, here) => pname match {
case NTiles => 2
case GroundTestKey => Seq(
GroundTestTileSettings(1, 1),
GroundTestTileSettings(1))
case GeneratorKey => GeneratorParameters(
maxRequests = 128,
startAddress = site(GlobalAddrMap)("mem").start)
case BuildGroundTest =>
(p: Parameters) => {
val id = p(GroundTestId)
if (id == 0) Module(new GeneratorTest()(p))
else Module(new NastiConverterTest()(p))
}
case _ => throw new CDEMatchError
})
class WithDirectMemtest extends Config(
(pname, site, here) => {
val nGens = 8
pname match {
case GroundTestKey => Seq(GroundTestTileSettings(uncached = nGens))
case GeneratorKey => GeneratorParameters(
maxRequests = 1024,
startAddress = 0)
case BuildGroundTest =>
(p: Parameters) => Module(new GeneratorTest()(p))
case _ => throw new CDEMatchError
}
})
class WithDirectComparator extends Config(
(pname, site, here) => pname match {
case GroundTestKey => Seq.fill(site(NTiles)) {
GroundTestTileSettings(uncached = site(ComparatorKey).targets.size)
}
case BuildGroundTest =>
(p: Parameters) => Module(new ComparatorCore()(p))
case ComparatorKey => ComparatorParameters(
targets = Seq(0L, 0x100L),
width = 8,
operations = 1000,
atomics = site(UseAtomics),
prefetches = site("COMPARATOR_PREFETCHES"))
case FPUConfig => None
case UseAtomics => false
case "COMPARATOR_PREFETCHES" => false
case _ => throw new CDEMatchError
})

View File

@ -0,0 +1,186 @@
// See LICENSE for license details.
package coreplex
import Chisel._
import scala.collection.mutable.{LinkedHashSet,LinkedHashMap}
import cde.{Parameters, ParameterDump, Config, Field, CDEMatchError}
case object RegressionTestNames extends Field[LinkedHashSet[String]]
abstract class RocketTestSuite {
val dir: String
val makeTargetName: String
val names: LinkedHashSet[String]
val envName: String
def postScript = s"""
$$(addprefix $$(output_dir)/, $$(addsuffix .hex, $$($makeTargetName))): $$(output_dir)/%.hex: $dir/%.hex
\tmkdir -p $$(output_dir)
\tln -fs $$< $$@
$$(addprefix $$(output_dir)/, $$($makeTargetName)): $$(output_dir)/%: $dir/%
\tmkdir -p $$(output_dir)
\tln -fs $$< $$@
run-$makeTargetName: $$(addprefix $$(output_dir)/, $$(addsuffix .out, $$($makeTargetName)))
\t@echo; perl -ne 'print " [$$$$1] $$$$ARGV \\t$$$$2\\n" if( /\\*{3}(.{8})\\*{3}(.*)/ || /ASSERTION (FAILED)/i )' $$^ /dev/null | perl -ne 'if(/(.*)/){print "$$$$1\\n\\n"; exit(1) if eof()}'
run-$makeTargetName-debug: $$(addprefix $$(output_dir)/, $$(addsuffix .vpd, $$($makeTargetName)))
\t@echo; perl -ne 'print " [$$$$1] $$$$ARGV \\t$$$$2\\n" if( /\\*{3}(.{8})\\*{3}(.*)/ || /ASSERTION (FAILED)/i )' $$(patsubst %.vpd,%.out,$$^) /dev/null | perl -ne 'if(/(.*)/){print "$$$$1\\n\\n"; exit(1) if eof()}'
"""
}
class AssemblyTestSuite(prefix: String, val names: LinkedHashSet[String])(val envName: String) extends RocketTestSuite {
val dir = "$(RISCV)/riscv64-unknown-elf/share/riscv-tests/isa"
val makeTargetName = prefix + "-" + envName + "-asm-tests"
override def toString = s"$makeTargetName = \\\n" + names.map(n => s"\t$prefix-$envName-$n").mkString(" \\\n") + postScript
}
class BenchmarkTestSuite(makePrefix: String, val dir: String, val names: LinkedHashSet[String]) extends RocketTestSuite {
val envName = ""
val makeTargetName = makePrefix + "-bmark-tests"
override def toString = s"$makeTargetName = \\\n" + names.map(n => s"\t$n.riscv").mkString(" \\\n") + postScript
}
class RegressionTestSuite(val names: LinkedHashSet[String]) extends RocketTestSuite {
val envName = ""
val dir = "$(RISCV)/riscv64-unknown-elf/share/riscv-tests/isa"
val makeTargetName = "regression-tests"
override def toString = s"$makeTargetName = \\\n" + names.mkString(" \\\n")
}
object TestGeneration {
import scala.collection.mutable.HashMap
val asmSuites = new LinkedHashMap[String,AssemblyTestSuite]()
val bmarkSuites = new LinkedHashMap[String,BenchmarkTestSuite]()
val regressionSuites = new LinkedHashMap[String,RegressionTestSuite]()
def addSuite(s: RocketTestSuite) {
s match {
case a: AssemblyTestSuite => asmSuites += (a.makeTargetName -> a)
case b: BenchmarkTestSuite => bmarkSuites += (b.makeTargetName -> b)
case r: RegressionTestSuite => regressionSuites += (r.makeTargetName -> r)
}
}
def addSuites(s: Seq[RocketTestSuite]) { s.foreach(addSuite) }
def generateMakefrag(topModuleName: String, configClassName: String) {
def gen(kind: String, s: Seq[RocketTestSuite]) = {
if(s.length > 0) {
val envs = s.groupBy(_.envName)
val targets = s.map(t => s"$$(${t.makeTargetName})").mkString(" ")
s.map(_.toString).mkString("\n") +
envs.filterKeys(_ != "").map( {
case (env,envsuites) => {
val suites = envsuites.map(t => s"$$(${t.makeTargetName})").mkString(" ")
s"""
run-$kind-$env-tests: $$(addprefix $$(output_dir)/, $$(addsuffix .out, $suites))
\t@echo; perl -ne 'print " [$$$$1] $$$$ARGV \\t$$$$2\\n" if( /\\*{3}(.{8})\\*{3}(.*)/ || /ASSERTION (FAILED)/i )' $$^ /dev/null | perl -ne 'if(/(.*)/){print "$$$$1\\n\\n"; exit(1) if eof()}'
run-$kind-$env-tests-debug: $$(addprefix $$(output_dir)/, $$(addsuffix .vpd, $suites))
\t@echo; perl -ne 'print " [$$$$1] $$$$ARGV \\t$$$$2\\n" if( /\\*{3}(.{8})\\*{3}(.*)/ || /ASSERTION (FAILED)/i )' $$(patsubst %.vpd,%.out,$$^) /dev/null | perl -ne 'if(/(.*)/){print "$$$$1\\n\\n"; exit(1) if eof()}'
run-$kind-$env-tests-fast: $$(addprefix $$(output_dir)/, $$(addsuffix .run, $suites))
\t@echo; perl -ne 'print " [$$$$1] $$$$ARGV \\t$$$$2\\n" if( /\\*{3}(.{8})\\*{3}(.*)/ || /ASSERTION (FAILED)/i )' $$^ /dev/null | perl -ne 'if(/(.*)/){print "$$$$1\\n\\n"; exit(1) if eof()}'
"""} } ).mkString("\n") + s"""
run-$kind-tests: $$(addprefix $$(output_dir)/, $$(addsuffix .out, $targets))
\t@echo; perl -ne 'print " [$$$$1] $$$$ARGV \\t$$$$2\\n" if( /\\*{3}(.{8})\\*{3}(.*)/ || /ASSERTION (FAILED)/i )' $$^ /dev/null | perl -ne 'if(/(.*)/){print "$$$$1\\n\\n"; exit(1) if eof()}'
run-$kind-tests-debug: $$(addprefix $$(output_dir)/, $$(addsuffix .vpd, $targets))
\t@echo; perl -ne 'print " [$$$$1] $$$$ARGV \\t$$$$2\\n" if( /\\*{3}(.{8})\\*{3}(.*)/ || /ASSERTION (FAILED)/i )' $$(patsubst %.vpd,%.out,$$^) /dev/null | perl -ne 'if(/(.*)/){print "$$$$1\\n\\n"; exit(1) if eof()}'
run-$kind-tests-fast: $$(addprefix $$(output_dir)/, $$(addsuffix .run, $targets))
\t@echo; perl -ne 'print " [$$$$1] $$$$ARGV \\t$$$$2\\n" if( /\\*{3}(.{8})\\*{3}(.*)/ || /ASSERTION (FAILED)/i )' $$^ /dev/null | perl -ne 'if(/(.*)/){print "$$$$1\\n\\n"; exit(1) if eof()}'
"""
} else { "\n" }
}
val f = createOutputFile(s"$topModuleName.$configClassName.d")
f.write(
List(
gen("asm", asmSuites.values.toSeq),
gen("bmark", bmarkSuites.values.toSeq),
gen("regression", regressionSuites.values.toSeq)
).mkString("\n"))
f.close
}
def createOutputFile(name: String) =
new java.io.FileWriter(s"${Driver.targetDir}/$name")
}
object DefaultTestSuites {
val rv32uiNames = LinkedHashSet(
"simple", "add", "addi", "and", "andi", "auipc", "beq", "bge", "bgeu", "blt", "bltu", "bne", "fence_i",
"jal", "jalr", "lb", "lbu", "lh", "lhu", "lui", "lw", "or", "ori", "sb", "sh", "sw", "sll", "slli",
"slt", "slti", "sra", "srai", "srl", "srli", "sub", "xor", "xori")
val rv32ui = new AssemblyTestSuite("rv32ui", rv32uiNames)(_)
val rv32ucNames = LinkedHashSet("rvc")
val rv32uc = new AssemblyTestSuite("rv32uc", rv32ucNames)(_)
val rv32umNames = LinkedHashSet("mul", "mulh", "mulhsu", "mulhu", "div", "divu", "rem", "remu")
val rv32um = new AssemblyTestSuite("rv32um", rv32umNames)(_)
val rv32uaNames = LinkedHashSet("lrsc", "amoadd_w", "amoand_w", "amoor_w", "amoxor_w", "amoswap_w", "amomax_w", "amomaxu_w", "amomin_w", "amominu_w")
val rv32ua = new AssemblyTestSuite("rv32ua", rv32uaNames)(_)
val rv32siNames = LinkedHashSet("csr", "ma_fetch", "scall", "sbreak", "wfi", "dirty")
val rv32si = new AssemblyTestSuite("rv32si", rv32siNames)(_)
val rv32miNames = LinkedHashSet("csr", "mcsr", "illegal", "ma_addr", "ma_fetch", "sbreak", "scall")
val rv32mi = new AssemblyTestSuite("rv32mi", rv32miNames)(_)
val rv32u = List(rv32ui, rv32um)
val rv32i = List(rv32ui, rv32si, rv32mi)
val rv32pi = List(rv32ui, rv32mi)
val rv64uiNames = LinkedHashSet("addw", "addiw", "ld", "lwu", "sd", "slliw", "sllw", "sltiu", "sltu", "sraiw", "sraw", "srliw", "srlw", "subw")
val rv64ui = new AssemblyTestSuite("rv64ui", rv32uiNames ++ rv64uiNames)(_)
val rv64umNames = LinkedHashSet("divuw", "divw", "mulw", "remuw", "remw")
val rv64um = new AssemblyTestSuite("rv64um", rv32umNames ++ rv64umNames)(_)
val rv64uaNames = rv32uaNames.map(_.replaceAll("_w","_d"))
val rv64ua = new AssemblyTestSuite("rv64ua", rv32uaNames ++ rv64uaNames)(_)
val rv64ucNames = rv32ucNames
val rv64uc = new AssemblyTestSuite("rv64uc", rv64ucNames)(_)
val rv64ufNames = LinkedHashSet("ldst", "move", "fsgnj", "fcmp", "fcvt", "fcvt_w", "fclass", "fadd", "fdiv", "fmin", "fmadd")
val rv64uf = new AssemblyTestSuite("rv64uf", rv64ufNames)(_)
val rv64ufNoDiv = new AssemblyTestSuite("rv64uf", rv64ufNames - "fdiv")(_)
val rv64udNames = rv64ufNames + "structural"
val rv64ud = new AssemblyTestSuite("rv64ud", rv64udNames)(_)
val rv64udNoDiv = new AssemblyTestSuite("rv64ud", rv64udNames - "fdiv")(_)
val rv64siNames = rv32siNames
val rv64si = new AssemblyTestSuite("rv64si", rv64siNames)(_)
val rv64miNames = rv32miNames + "breakpoint"
val rv64mi = new AssemblyTestSuite("rv64mi", rv64miNames)(_)
val groundtestNames = LinkedHashSet("simple")
val groundtest64 = new AssemblyTestSuite("rv64ui", groundtestNames)(_)
val groundtest32 = new AssemblyTestSuite("rv32ui", groundtestNames)(_)
// TODO: "rv64ui-pm-lrsc", "rv64mi-pm-ipi",
val rv64u = List(rv64ui, rv64um)
val rv64i = List(rv64ui, rv64si, rv64mi)
val rv64pi = List(rv64ui, rv64mi)
val benchmarks = new BenchmarkTestSuite("rvi", "$(RISCV)/riscv64-unknown-elf/share/riscv-tests/benchmarks", LinkedHashSet(
"median", "multiply", "qsort", "towers", "vvadd", "dhrystone", "mt-matmul"))
val rv32udBenchmarks = new BenchmarkTestSuite("rvd", "$(RISCV)/riscv64-unknown-elf/share/riscv-tests/benchmarks", LinkedHashSet(
"mm", "spmv", "mt-vvadd"))
val emptyBmarks = new BenchmarkTestSuite("empty",
"$(RISCV)/riscv64-unknown-elf/share/riscv-tests/benchmarks", LinkedHashSet.empty)
val mtBmarks = new BenchmarkTestSuite("mt", "$(RISCV)/riscv64-unknown-elf/share/riscv-tests/mt",
LinkedHashSet(((0 to 4).map("vvadd"+_) ++
List("ad","ae","af","ag","ai","ak","al","am","an","ap","aq","ar","at","av","ay","az",
"bb","bc","bf","bh","bj","bk","bm","bo","br","bs","ce","cf","cg","ci","ck","cl",
"cm","cs","cv","cy","dc","df","dm","do","dr","ds","du","dv").map(_+"_matmul")): _*))
}

View File

@ -0,0 +1,22 @@
package coreplex
import Chisel._
import junctions.unittests.UnitTestSuite
import rocket.Tile
import uncore.tilelink.TLId
import cde.Parameters
class UnitTestCoreplex(topParams: Parameters) extends Coreplex()(topParams) {
require(!exportMMIO)
require(nExtClients == 0)
require(nMemChannels == 0)
io.debug.req.ready := Bool(false)
io.debug.resp.valid := Bool(false)
val l1params = p.alterPartial({ case TLId => "L1toL2" })
val tests = Module(new UnitTestSuite()(l1params))
override def hasSuccessFlag = true
io.success.get := tests.io.finished
}

View File

@ -0,0 +1,115 @@
package groundtest
import Chisel._
import uncore.tilelink._
import uncore.agents._
import uncore.coherence.{InnerTLId, OuterTLId}
import uncore.util._
import junctions.HasAddrMapParameters
import cde.Parameters
/**
* An example bus mastering devices that writes some preset data to memory.
* When it receives an MMIO put request, it starts writing out the data.
* When it receives an MMIO get request, it responds with the progress of
* the write. A grant data of 1 means it is still writing, grant data 0
* means it has finished.
*/
class ExampleBusMaster(implicit val p: Parameters) extends Module
with HasAddrMapParameters
with HasTileLinkParameters {
val mmioParams = p.alterPartial({ case TLId => p(InnerTLId) })
val memParams = p.alterPartial({ case TLId => p(OuterTLId) })
val memStart = addrMap("mem").start
val memStartBlock = memStart >> p(CacheBlockOffsetBits)
val io = new Bundle {
val mmio = new ClientUncachedTileLinkIO()(mmioParams).flip
val mem = new ClientUncachedTileLinkIO()(memParams)
}
val s_idle :: s_put :: s_resp :: Nil = Enum(Bits(), 3)
val state = Reg(init = s_idle)
val send_resp = Reg(init = Bool(false))
val r_acq = Reg(new AcquireMetadata)
io.mmio.acquire.ready := !send_resp
io.mmio.grant.valid := send_resp
io.mmio.grant.bits := Grant(
is_builtin_type = Bool(true),
g_type = r_acq.getBuiltInGrantType(),
client_xact_id = r_acq.client_xact_id,
manager_xact_id = UInt(0),
addr_beat = r_acq.addr_beat,
data = Mux(state === s_idle, UInt(0), UInt(1)))
when (io.mmio.acquire.fire()) {
send_resp := Bool(true)
r_acq := io.mmio.acquire.bits
when (state === s_idle && io.mmio.acquire.bits.hasData()) { state := s_put }
}
when (io.mmio.grant.fire()) { send_resp := Bool(false) }
val (put_beat, put_done) = Counter(io.mem.acquire.fire(), tlDataBeats)
when (put_done) { state := s_resp }
when (io.mem.grant.fire()) { state := s_idle }
io.mem.acquire.valid := state === s_put
io.mem.acquire.bits := PutBlock(
client_xact_id = UInt(0),
addr_block = UInt(memStartBlock),
addr_beat = put_beat,
data = put_beat)
io.mem.grant.ready := state === s_resp
}
class BusMasterTest(implicit p: Parameters) extends GroundTest()(p)
with HasTileLinkParameters {
val (s_idle :: s_req_start :: s_resp_start :: s_req_poll :: s_resp_poll ::
s_req_check :: s_resp_check :: s_done :: Nil) = Enum(Bits(), 8)
val state = Reg(init = s_idle)
val busMasterBlock = addrMap("io:ext:busmaster").start >> p(CacheBlockOffsetBits)
val start_acq = Put(
client_xact_id = UInt(0),
addr_block = UInt(busMasterBlock),
addr_beat = UInt(0),
data = UInt(1))
val poll_acq = Get(
client_xact_id = UInt(0),
addr_block = UInt(busMasterBlock),
addr_beat = UInt(0))
val check_acq = GetBlock(
client_xact_id = UInt(0),
addr_block = UInt(memStartBlock))
val acq = io.mem.head.acquire
val gnt = io.mem.head.grant
acq.valid := state.isOneOf(s_req_start, s_req_poll, s_req_check)
acq.bits := MuxLookup(state, check_acq, Seq(
s_req_start -> start_acq,
s_req_poll -> poll_acq))
gnt.ready := state.isOneOf(s_resp_start, s_resp_poll, s_resp_check)
val (get_beat, get_done) = Counter(
state === s_resp_check && gnt.valid, tlDataBeats)
when (state === s_idle) { state := s_req_start }
when (state === s_req_start && acq.ready) { state := s_resp_start }
when (state === s_resp_start && gnt.valid) { state := s_req_poll }
when (state === s_req_poll && acq.ready) { state := s_resp_poll }
when (state === s_resp_poll && gnt.valid) {
when (gnt.bits.data === UInt(0)) {
state := s_req_check
} .otherwise { state := s_req_poll }
}
when (state === s_req_check && acq.ready) { state := s_resp_check }
when (get_done) { state := s_done }
io.status.finished := state === s_done
assert(state =/= s_resp_check || !gnt.valid ||
gnt.bits.data === get_beat,
"BusMasterTest: data does not match")
}

View File

@ -0,0 +1,50 @@
package groundtest
import Chisel._
import uncore.tilelink._
import uncore.constants._
import uncore.agents._
import uncore.util._
import cde.{Parameters, Field}
class CacheFillTest(implicit p: Parameters) extends GroundTest()(p)
with HasTileLinkParameters {
val capacityKb: Int = p("L2_CAPACITY_IN_KB")
val nblocks = capacityKb * 1024 / p(CacheBlockBytes)
val s_start :: s_prefetch :: s_retrieve :: s_finished :: Nil = Enum(Bits(), 4)
val state = Reg(init = s_start)
val active = state.isOneOf(s_prefetch, s_retrieve)
val xact_pending = Reg(init = UInt(0, tlMaxClientXacts))
val xact_id = PriorityEncoder(~xact_pending)
val (req_block, round_done) = Counter(io.mem.head.acquire.fire(), nblocks)
io.mem.head.acquire.valid := active && !xact_pending.andR
io.mem.head.acquire.bits := Mux(state === s_prefetch,
GetPrefetch(xact_id, UInt(memStartBlock) + req_block),
GetBlock(xact_id, UInt(memStartBlock) + req_block))
io.mem.head.grant.ready := xact_pending.orR
def add_pending(acq: DecoupledIO[Acquire]): UInt =
Mux(acq.fire(), UIntToOH(acq.bits.client_xact_id), UInt(0))
def remove_pending(gnt: DecoupledIO[Grant]): UInt = {
val last_grant = !gnt.bits.hasMultibeatData() ||
gnt.bits.addr_beat === UInt(tlDataBeats - 1)
~Mux(gnt.fire() && last_grant, UIntToOH(gnt.bits.client_xact_id), UInt(0))
}
xact_pending := (xact_pending |
add_pending(io.mem.head.acquire)) &
remove_pending(io.mem.head.grant)
when (state === s_start) { state := s_prefetch }
when (state === s_prefetch && round_done) { state := s_retrieve }
when (state === s_retrieve && round_done) { state := s_finished }
io.status.finished := (state === s_finished)
io.status.timeout.valid := Bool(false)
io.status.error.valid := Bool(false)
}

View File

@ -0,0 +1,387 @@
package groundtest
import Chisel._
import uncore.tilelink._
import uncore.constants._
import junctions._
import rocket._
import scala.util.Random
import cde.{Parameters, Field}
case class ComparatorParameters(
targets: Seq[Long],
width: Int,
operations: Int,
atomics: Boolean,
prefetches: Boolean)
case object ComparatorKey extends Field[ComparatorParameters]
trait HasComparatorParameters {
implicit val p: Parameters
val comparatorParams = p(ComparatorKey)
val targets = comparatorParams.targets
val nTargets = targets.size
val targetWidth = comparatorParams.width
val nOperations = comparatorParams.operations
val atomics = comparatorParams.atomics
val prefetches = comparatorParams.prefetches
}
object LFSR64
{
private var counter = 0
private def next: Int = {
counter += 1
counter
}
def apply(increment: Bool = Bool(true), seed: Int = next): UInt =
{
val wide = 64
val lfsr = RegInit(UInt((seed * 0xDEADBEEFCAFEBAB1L) >>> 1, width = wide))
val xor = lfsr(0) ^ lfsr(1) ^ lfsr(3) ^ lfsr(4)
when (increment) { lfsr := Cat(xor, lfsr(wide-1,1)) }
lfsr
}
}
object NoiseMaker
{
def apply(wide: Int, increment: Bool = Bool(true)): UInt = {
val lfsrs = Seq.fill((wide+63)/64) { LFSR64(increment) }
Cat(lfsrs)(wide-1,0)
}
}
object MaskMaker
{
def apply(wide: Int, bits: UInt): UInt =
Vec.tabulate(wide) {UInt(_) < bits} .asUInt
}
class ComparatorSource(implicit val p: Parameters) extends Module
with HasComparatorParameters
with HasTileLinkParameters
{
val io = new Bundle {
val out = Decoupled(new Acquire)
val finished = Bool(OUTPUT)
}
// Output exactly nOperations of Acquires
val finished = RegInit(Bool(false))
val valid = RegInit(Bool(false))
valid := Bool(true)
io.finished := finished
io.out.valid := !finished && valid
// Generate random operand sizes
val inc = io.out.fire()
val raw_operand_size = NoiseMaker(2, inc) | UInt(0, M_SZ)
val max_operand_size = UInt(log2Up(tlDataBytes))
val get_operand_size = Mux(raw_operand_size > max_operand_size, max_operand_size, raw_operand_size)
val atomic_operand_size = UInt(2) + NoiseMaker(1, inc) // word or dword
// Generate random, but valid addr_bytes
val raw_addr_byte = NoiseMaker(tlByteAddrBits, inc)
val get_addr_byte = raw_addr_byte & ~MaskMaker(tlByteAddrBits, get_operand_size)
val atomic_addr_byte = raw_addr_byte & ~MaskMaker(tlByteAddrBits, atomic_operand_size)
// Only allow some of the possible choices (M_XA_MAXU untested)
val atomic_opcode = MuxLookup(NoiseMaker(3, inc), M_XA_SWAP, Array(
UInt("b000") -> M_XA_ADD,
UInt("b001") -> M_XA_XOR,
UInt("b010") -> M_XA_OR,
UInt("b011") -> M_XA_AND,
UInt("b100") -> M_XA_MIN,
UInt("b101") -> M_XA_MAX,
UInt("b110") -> M_XA_MINU,
UInt("b111") -> M_XA_SWAP))
// Addr_block range
val addr_block_mask = MaskMaker(tlBlockAddrBits, UInt(targetWidth-tlBeatAddrBits-tlByteAddrBits))
// Generate some random values
val addr_block = NoiseMaker(tlBlockAddrBits, inc) & addr_block_mask
val addr_beat = NoiseMaker(tlBeatAddrBits, inc)
val wmask = NoiseMaker(tlDataBytes, inc)
val data = NoiseMaker(tlDataBits, inc)
val client_xact_id = UInt(0) // filled by Client
// Random transactions
val get = Get(client_xact_id, addr_block, addr_beat, get_addr_byte, get_operand_size, Bool(false))
val getBlock = GetBlock(client_xact_id, addr_block)
val put = Put(client_xact_id, addr_block, addr_beat, data, Some(wmask))
val putBlock = PutBlock(client_xact_id, addr_block, UInt(0), data)
val putAtomic = if (atomics)
PutAtomic(client_xact_id, addr_block, addr_beat,
atomic_addr_byte, atomic_opcode, atomic_operand_size, data)
else put
val putPrefetch = if (prefetches)
PutPrefetch(client_xact_id, addr_block)
else put
val getPrefetch = if (prefetches)
GetPrefetch(client_xact_id, addr_block)
else get
val a_type_sel = NoiseMaker(3, inc)
// We must initially putBlock all of memory to have a consistent starting state
val final_addr_block = addr_block_mask + UInt(1)
val wipe_addr_block = RegInit(UInt(0, width = tlBlockAddrBits))
val done_wipe = wipe_addr_block === final_addr_block
io.out.bits := Mux(!done_wipe,
// Override whatever else we were going to do if we are wiping
PutBlock(client_xact_id, wipe_addr_block, UInt(0), data),
// Generate a random a_type
MuxLookup(a_type_sel, get, Array(
UInt("b000") -> get,
UInt("b001") -> getBlock,
UInt("b010") -> put,
UInt("b011") -> putBlock,
UInt("b100") -> putAtomic,
UInt("b101") -> getPrefetch,
UInt("b110") -> putPrefetch)))
val idx = Reg(init = UInt(0, log2Up(nOperations)))
when (io.out.fire()) {
when (idx === UInt(nOperations - 1)) { finished := Bool(true) }
when (!done_wipe) {
printf("[acq %d]: PutBlock(addr_block = %x, data = %x)\n",
idx, wipe_addr_block, data)
wipe_addr_block := wipe_addr_block + UInt(1)
} .otherwise {
switch (a_type_sel) {
is (UInt("b000")) {
printf("[acq %d]: Get(addr_block = %x, addr_beat = %x, addr_byte = %x, op_size = %x)\n",
idx, addr_block, addr_beat, get_addr_byte, get_operand_size)
}
is (UInt("b001")) {
printf("[acq %d]: GetBlock(addr_block = %x)\n", idx, addr_block)
}
is (UInt("b010")) {
printf("[acq %d]: Put(addr_block = %x, addr_beat = %x, data = %x, wmask = %x)\n",
idx, addr_block, addr_beat, data, wmask)
}
is (UInt("b011")) {
printf("[acq %d]: PutBlock(addr_block = %x, data = %x)\n", idx, addr_block, data)
}
is (UInt("b100")) {
if (atomics) {
printf("[acq %d]: PutAtomic(addr_block = %x, addr_beat = %x, addr_byte = %x, " +
"opcode = %x, op_size = %x, data = %x)\n",
idx, addr_block, addr_beat, atomic_addr_byte,
atomic_opcode, atomic_operand_size, data)
} else {
printf("[acq %d]: Put(addr_block = %x, addr_beat = %x, data = %x, wmask = %x)\n",
idx, addr_block, addr_beat, data, wmask)
}
}
is (UInt("b101")) {
if (prefetches) {
printf("[acq %d]: GetPrefetch(addr_block = %x)\n", idx, addr_block)
} else {
printf("[acq %d]: Get(addr_block = %x, addr_beat = %x, addr_byte = %x, op_size = %x)\n",
idx, addr_block, addr_beat, get_addr_byte, get_operand_size)
}
}
is (UInt("b110")) {
if (prefetches) {
printf("[acq %d]: PutPrefetch(addr_block = %x)\n", idx, addr_block)
} else {
printf("[acq %d]: Put(addr_block = %x, addr_beat = %x, data = %x, wmask = %x)\n",
idx, addr_block, addr_beat, data, wmask)
}
}
is (UInt("b111")) {
printf("[acq %d]: Get(addr_block = %x, addr_beat = %x, addr_byte = %x, op_size = %x)\n",
idx, addr_block, addr_beat, get_addr_byte, get_operand_size)
}
}
}
idx := idx + UInt(1)
}
}
class ComparatorClient(val target: Long)(implicit val p: Parameters) extends Module
with HasComparatorParameters
with HasTileLinkParameters
{
val io = new Bundle {
val in = Decoupled(new Acquire).flip
val tl = new ClientUncachedTileLinkIO()
val out = Decoupled(new Grant)
val finished = Bool(OUTPUT)
val timeout = Bool(OUTPUT)
}
val xacts = tlMaxClientXacts
val offset = (UInt(target) >> UInt(tlBeatAddrBits+tlByteAddrBits))
// Track the status of inflight requests
val issued = RegInit(Vec.fill(xacts) {Bool(false)})
val ready = RegInit(Vec.fill(xacts) {Bool(false)})
val result = Reg(Vec(xacts, new Grant))
val buffer = Queue(io.in, xacts)
val queue = Module(new Queue(io.tl.acquire.bits.client_xact_id, xacts))
val isMultiOut = buffer.bits.hasMultibeatData()
val isMultiIn = io.tl.grant.bits.hasMultibeatData()
val beatOut = RegInit(UInt(0, width = tlBeatAddrBits))
val lastBeat = UInt(tlDataBeats-1)
val isFirstBeatOut= Mux(isMultiOut, beatOut === UInt(0), Bool(true))
val isLastBeatOut = Mux(isMultiOut, beatOut === lastBeat, Bool(true))
val isLastBeatIn = Mux(isMultiIn, io.tl.grant.bits.addr_beat === lastBeat, Bool(true))
// Remove this once HoldUnless is in chisel3
def holdUnless[T <: Data](in : T, enable: Bool): T = Mux(!enable, RegEnable(in, enable), in)
// Potentially issue a request, using a free xact id
// NOTE: we may retract valid and change xact_id on a !ready (allowed by spec)
val allow_acq = NoiseMaker(1)(0) && issued.map(!_).reduce(_ || _)
val xact_id = holdUnless(PriorityEncoder(issued.map(!_)), isFirstBeatOut)
buffer.ready := allow_acq && io.tl.acquire.ready && isLastBeatOut
io.tl.acquire.valid := allow_acq && buffer.valid
io.tl.acquire.bits := buffer.bits
io.tl.acquire.bits.addr_block := buffer.bits.addr_block + offset
io.tl.acquire.bits.client_xact_id := xact_id
when (isMultiOut) {
val dataOut = (buffer.bits.data << beatOut) + buffer.bits.data // mix the data up a bit
io.tl.acquire.bits.addr_beat := beatOut
io.tl.acquire.bits.data := dataOut
}
when (io.tl.acquire.fire()) {
issued(xact_id) := isLastBeatOut
when (isMultiOut) { beatOut := beatOut + UInt(1) }
}
// Remember the xact ID so we can return results in-order
queue.io.enq.valid := io.tl.acquire.fire() && isLastBeatOut
queue.io.enq.bits := xact_id
assert (queue.io.enq.ready || !queue.io.enq.valid) // should be big enough
// Capture the results from the manager
io.tl.grant.ready := NoiseMaker(1)(0)
when (io.tl.grant.fire()) {
val id = io.tl.grant.bits.client_xact_id
assert (!ready(id)) // got same xact_id twice?
ready(id) := isLastBeatIn
result(id) := io.tl.grant.bits
}
// Bad xact_id returned if ready but not issued!
assert ((ready zip issued) map {case (r,i) => i || !r} reduce (_ && _))
// When we have the next grant result, send it to the sink
val next_id = queue.io.deq.bits
queue.io.deq.ready := io.out.ready && ready(next_id) // TODO: only compares last getBlock
io.out.valid := queue.io.deq.valid && ready(next_id)
io.out.bits := result(queue.io.deq.bits)
when (io.out.fire()) {
ready(next_id) := Bool(false)
issued(next_id) := Bool(false)
}
io.finished := !buffer.valid && !issued.reduce(_ || _)
val (idx, acq_done) = Counter(
io.tl.acquire.fire() && io.tl.acquire.bits.last(), nOperations)
debug(idx)
val timer = Module(new Timer(8192, xacts))
timer.io.start.valid := io.tl.acquire.fire() && io.tl.acquire.bits.first()
timer.io.start.bits := xact_id
timer.io.stop.valid := io.tl.grant.fire() && io.tl.grant.bits.first()
timer.io.stop.bits := io.tl.grant.bits.client_xact_id
assert(!timer.io.timeout.valid, "Comparator TL client timed out")
io.timeout := timer.io.timeout.valid
}
class ComparatorSink(implicit val p: Parameters) extends Module
with HasComparatorParameters
with HasTileLinkParameters
with HasGroundTestConstants
{
val io = new Bundle {
val in = Vec(nTargets, Decoupled(new Grant)).flip
val finished = Bool(OUTPUT)
val error = Valid(UInt(width = errorCodeBits))
}
// could use a smaller Queue here, but would couple targets flow controls together
val queues = io.in.map(Queue(_, nOperations))
io.finished := queues.map(!_.valid).reduce(_ && _)
val all_valid = queues.map(_.valid).reduce(_ && _)
queues.foreach(_.ready := all_valid)
val base = queues(0).bits
val idx = Reg(init = UInt(0, log2Up(nOperations)))
def check(g: Grant) = {
when (g.hasData() && base.data =/= g.data) {
printf("%d: %x =/= %x, g_type = %x\n", idx, base.data, g.data, g.g_type)
}
val assert_conds = Seq(
g.is_builtin_type,
base.g_type === g.g_type,
base.addr_beat === g.addr_beat || !g.hasData(),
base.data === g.data || !g.hasData())
assert (g.is_builtin_type, "grant not builtin")
assert (base.g_type === g.g_type, "g_type mismatch")
assert (base.addr_beat === g.addr_beat || !g.hasData(), "addr_beat mismatch")
assert (base.data === g.data || !g.hasData(), "data mismatch")
assert_conds.zipWithIndex.foreach { case (cond, i) =>
when (!cond) {
io.error.valid := Bool(true)
io.error.bits := UInt(i)
}
}
}
when (all_valid) {
when (base.hasData()) {
printf("[gnt %d]: g_type = %x, addr_beat = %x, data = %x\n",
idx, base.g_type, base.addr_beat, base.data)
} .otherwise {
printf("[gnt %d]: g_type = %x\n", idx, base.g_type)
}
queues.drop(1).map(_.bits).foreach(check)
idx := idx + UInt(1)
}
}
class ComparatorCore(implicit p: Parameters) extends GroundTest()(p)
with HasComparatorParameters
with HasTileLinkParameters {
require (io.mem.size == nTargets)
val source = Module(new ComparatorSource)
val sink = Module(new ComparatorSink)
val broadcast = Broadcaster(source.io.out, nTargets)
val clients = targets.zipWithIndex.map { case (target, index) =>
val client = Module(new ComparatorClient(target))
client.io.in <> broadcast(index)
io.mem(index) <> client.io.tl
sink.io.in(index) <> client.io.out
client
}
val client_timeouts = clients.map(_.io.timeout)
io.status.finished := source.io.finished && sink.io.finished && clients.map(_.io.finished).reduce(_ && _)
io.status.timeout.valid := client_timeouts.reduce(_ || _)
io.status.timeout.bits := MuxCase(UInt(0),
client_timeouts.zipWithIndex.map {
case (timeout, i) => (timeout -> UInt(i))
})
io.status.error := sink.io.error
}

View File

@ -0,0 +1,212 @@
package groundtest
import Chisel._
import uncore.tilelink._
import uncore.devices.NTiles
import uncore.constants._
import junctions._
import rocket._
import scala.util.Random
import cde.{Parameters, Field}
case class GeneratorParameters(
maxRequests: Int,
startAddress: BigInt)
case object GeneratorKey extends Field[GeneratorParameters]
trait HasGeneratorParameters extends HasGroundTestParameters {
implicit val p: Parameters
val genParams = p(GeneratorKey)
val nGens = p(GroundTestKey).map(
cs => cs.uncached + cs.cached).reduce(_ + _)
val genTimeout = 8192
val maxRequests = genParams.maxRequests
val startAddress = genParams.startAddress
val genWordBits = 32
val genWordBytes = genWordBits / 8
val wordOffset = log2Ceil(genWordBytes)
val wordSize = UInt(log2Ceil(genWordBytes))
require(startAddress % BigInt(genWordBytes) == 0)
}
class UncachedTileLinkGenerator(id: Int)
(implicit p: Parameters) extends TLModule()(p) with HasGeneratorParameters {
private val tlBlockOffset = tlBeatAddrBits + tlByteAddrBits
val io = new Bundle {
val mem = new ClientUncachedTileLinkIO
val status = new GroundTestStatus
}
val (s_start :: s_put :: s_get :: s_finished :: Nil) = Enum(Bits(), 4)
val state = Reg(init = s_start)
val (req_cnt, req_wrap) = Counter(io.mem.grant.fire(), maxRequests)
val sending = Reg(init = Bool(false))
when (state === s_start) {
sending := Bool(true)
state := s_put
}
when (io.mem.acquire.fire()) { sending := Bool(false) }
when (io.mem.grant.fire()) { sending := Bool(true) }
when (req_wrap) { state := Mux(state === s_put, s_get, s_finished) }
val timeout = Timer(genTimeout, io.mem.acquire.fire(), io.mem.grant.fire())
assert(!timeout, s"Uncached generator ${id} timed out waiting for grant")
io.status.finished := (state === s_finished)
io.status.timeout.valid := timeout
io.status.timeout.bits := UInt(id)
val part_of_full_addr =
if (log2Ceil(nGens) > 0) {
Cat(UInt(id, log2Ceil(nGens)),
UInt(0, wordOffset))
} else {
UInt(0, wordOffset)
}
val full_addr = UInt(startAddress) + Cat(req_cnt, part_of_full_addr)
val addr_block = full_addr >> UInt(tlBlockOffset)
val addr_beat = full_addr(tlBlockOffset - 1, tlByteAddrBits)
val addr_byte = full_addr(tlByteAddrBits - 1, 0)
val data_prefix = Cat(UInt(id, log2Up(nGens)), req_cnt)
val word_data = Wire(UInt(width = genWordBits))
word_data := Cat(data_prefix, part_of_full_addr)
val beat_data = Fill(tlDataBits / genWordBits, word_data)
val wshift = Cat(beatOffset(full_addr), UInt(0, wordOffset))
val wmask = Fill(genWordBits / 8, Bits(1, 1)) << wshift
val put_acquire = Put(
client_xact_id = UInt(0),
addr_block = addr_block,
addr_beat = addr_beat,
data = beat_data,
wmask = Some(wmask),
alloc = Bool(false))
val get_acquire = Get(
client_xact_id = UInt(0),
addr_block = addr_block,
addr_beat = addr_beat,
addr_byte = addr_byte,
operand_size = wordSize,
alloc = Bool(false))
io.mem.acquire.valid := sending && !io.status.finished
io.mem.acquire.bits := Mux(state === s_put, put_acquire, get_acquire)
io.mem.grant.ready := !sending && !io.status.finished
def wordFromBeat(addr: UInt, dat: UInt) = {
val shift = Cat(beatOffset(addr), UInt(0, wordOffset + 3))
(dat >> shift)(genWordBits - 1, 0)
}
val data_mismatch = io.mem.grant.fire() && state === s_get &&
wordFromBeat(full_addr, io.mem.grant.bits.data) =/= word_data
io.status.error.valid := data_mismatch
io.status.error.bits := UInt(id)
assert(!data_mismatch,
s"Get received incorrect data in uncached generator ${id}")
def beatOffset(addr: UInt) = // TODO zero-width
if (tlByteAddrBits > wordOffset) addr(tlByteAddrBits - 1, wordOffset)
else UInt(0)
}
class HellaCacheGenerator(id: Int)
(implicit p: Parameters) extends L1HellaCacheModule()(p) with HasGeneratorParameters {
val io = new Bundle {
val mem = new HellaCacheIO
val status = new GroundTestStatus
}
val timeout = Timer(genTimeout, io.mem.req.fire(), io.mem.resp.valid)
assert(!timeout, s"Cached generator ${id} timed out waiting for response")
io.status.timeout.valid := timeout
io.status.timeout.bits := UInt(id)
val (s_start :: s_write :: s_read :: s_finished :: Nil) = Enum(Bits(), 4)
val state = Reg(init = s_start)
val sending = Reg(init = Bool(false))
val (req_cnt, req_wrap) = Counter(io.mem.resp.valid, maxRequests)
val part_of_req_addr =
if (log2Ceil(nGens) > 0) {
Cat(UInt(id, log2Ceil(nGens)),
UInt(0, wordOffset))
} else {
UInt(0, wordOffset)
}
val req_addr = UInt(startAddress) + Cat(req_cnt, part_of_req_addr)
val req_data = Cat(UInt(id, log2Up(nGens)), req_cnt, part_of_req_addr)
io.mem.req.valid := sending && !io.status.finished
io.mem.req.bits.addr := req_addr
io.mem.req.bits.data := req_data
io.mem.req.bits.typ := wordSize
io.mem.req.bits.cmd := Mux(state === s_write, M_XWR, M_XRD)
io.mem.req.bits.tag := UInt(0)
when (state === s_start) { sending := Bool(true); state := s_write }
when (io.mem.req.fire()) { sending := Bool(false) }
when (io.mem.resp.valid) { sending := Bool(true) }
when (req_wrap) { state := Mux(state === s_write, s_read, s_finished) }
io.status.finished := (state === s_finished)
def data_match(recv: Bits, expected: Bits): Bool = {
val recv_resized = Wire(Bits(width = genWordBits))
val exp_resized = Wire(Bits(width = genWordBits))
recv_resized := recv
exp_resized := expected
recv_resized === exp_resized
}
val data_mismatch = io.mem.resp.valid && io.mem.resp.bits.has_data &&
!data_match(io.mem.resp.bits.data, req_data)
io.status.error.valid := data_mismatch
io.status.error.bits := UInt(id)
assert(!data_mismatch,
s"Received incorrect data in cached generator ${id}")
}
class GeneratorTest(implicit p: Parameters)
extends GroundTest()(p) with HasGeneratorParameters {
val idStart = p(GroundTestKey).take(tileId)
.map(settings => settings.cached + settings.uncached)
.foldLeft(0)(_ + _)
val cached = List.tabulate(nCached) { i =>
val realId = idStart + i
Module(new HellaCacheGenerator(realId))
}
val uncached = List.tabulate(nUncached) { i =>
val realId = idStart + nCached + i
Module(new UncachedTileLinkGenerator(realId))
}
io.cache <> cached.map(_.io.mem)
io.mem <> uncached.map(_.io.mem)
val gen_debug = cached.map(_.io.status) ++ uncached.map(_.io.status)
io.status := DebugCombiner(gen_debug)
}

View File

@ -0,0 +1,121 @@
package groundtest
import Chisel._
import uncore.tilelink._
import uncore.converters._
import junctions._
import cde.Parameters
class NastiGenerator(id: Int)(implicit val p: Parameters) extends Module
with HasNastiParameters
with HasMIFParameters
with HasAddrMapParameters
with HasGeneratorParameters {
val io = new Bundle {
val status = new GroundTestStatus
val mem = new NastiIO
}
val mifDataBytes = mifDataBits / 8
val (s_start :: s_write_addr :: s_write_data ::
s_read :: s_wait :: s_finish :: Nil) = Enum(Bits(), 6)
val state = Reg(init = s_start)
def ref_data(idx: UInt) = UInt(0x35abffcd, genWordBits) + (idx << UInt(3))
val part_of_addr =
if (log2Ceil(nGens) > 0) {
Cat(UInt(id, log2Ceil(nGens)),
UInt(0, wordOffset))
} else {
UInt(0, wordOffset)
}
val (write_idx, write_done) = Counter(io.mem.w.fire(), maxRequests)
val write_addr = UInt(startAddress) + Cat(write_idx, part_of_addr)
val write_data = Fill(mifDataBits / genWordBits, ref_data(write_idx))
val write_align = write_addr(log2Up(mifDataBytes) - 1, 0)
val write_mask = UInt((1 << genWordBytes) - 1, nastiWStrobeBits) << write_align
val (read_idx, read_done) = Counter(io.mem.ar.fire(), maxRequests)
val read_addr = UInt(startAddress) + Cat(read_idx, part_of_addr)
io.mem.aw.valid := (state === s_write_addr)
io.mem.aw.bits := NastiWriteAddressChannel(
id = write_idx(nastiXIdBits - 1, 0),
addr = write_addr,
len = UInt(0),
size = UInt(log2Ceil(genWordBytes)))
io.mem.w.valid := (state === s_write_data)
io.mem.w.bits := NastiWriteDataChannel(
data = write_data,
strb = Some(write_mask),
last = Bool(true))
io.mem.ar.valid := (state === s_read)
io.mem.ar.bits := NastiReadAddressChannel(
id = UInt(0),
addr = read_addr,
len = UInt(0),
size = UInt(log2Ceil(genWordBytes)))
io.mem.r.ready := Bool(true)
io.mem.b.ready := Bool(true)
io.status.finished := (state === s_finish)
val (read_resp_idx, read_resp_done) = Counter(io.mem.r.fire(), maxRequests)
val read_resp_addr = UInt(startAddress) + Cat(read_resp_idx, part_of_addr)
val read_offset = read_resp_addr(log2Up(nastiXDataBits / 8) - 1, 0)
val read_shift = Cat(read_offset, UInt(0, 3))
val read_data = (io.mem.r.bits.data >> read_shift)(genWordBits - 1, 0)
val data_mismatch = io.mem.r.valid && read_data =/= ref_data(read_resp_idx)
assert(!data_mismatch, "NASTI Test: results do not match")
io.status.error.valid := data_mismatch
io.status.error.bits := UInt(1)
when (state === s_start) { state := s_write_addr }
when (io.mem.aw.fire()) { state := s_write_data }
when (io.mem.w.fire()) { state := s_write_addr }
when (write_done) { state := s_read }
when (read_done) { state := s_wait }
when (read_resp_done) { state := s_finish }
val r_timer = Module(new Timer(1000, 2))
r_timer.io.start.valid := io.mem.ar.fire()
r_timer.io.start.bits := io.mem.ar.bits.id
r_timer.io.stop.valid := io.mem.r.fire() && io.mem.r.bits.last
r_timer.io.stop.bits := io.mem.r.bits.id
assert(!r_timer.io.timeout.valid, "NASTI Read timed out")
val w_timer = Module(new Timer(1000, 2))
w_timer.io.start.valid := io.mem.aw.fire()
w_timer.io.start.bits := io.mem.aw.bits.id
w_timer.io.stop.valid := io.mem.b.fire()
w_timer.io.stop.bits := io.mem.b.bits.id
assert(!w_timer.io.timeout.valid, "NASTI Write timed out")
io.status.timeout.valid := r_timer.io.timeout.valid || w_timer.io.timeout.valid
io.status.timeout.bits := Mux(r_timer.io.timeout.valid, UInt(1), UInt(2))
}
class NastiConverterTest(implicit p: Parameters) extends GroundTest()(p)
with HasNastiParameters {
require(tileSettings.uncached == 1 && tileSettings.cached == 0)
val genId = p(GroundTestKey).take(tileId)
.map(settings => settings.cached + settings.uncached)
.foldLeft(0)(_ + _)
val test = Module(new NastiGenerator(genId))
val converter = Module(new TileLinkIONastiIOConverter()(
p.alterPartial { case TLId => "Outermost" }))
converter.io.nasti <> test.io.mem
TileLinkWidthAdapter(io.mem.head, converter.io.tl)
io.status := test.io.status
}

View File

@ -0,0 +1,776 @@
package groundtest
import Chisel._
import uncore.tilelink._
import uncore.constants._
import uncore.agents._
import uncore.util._
import junctions.{ParameterizedBundle, HasAddrMapParameters, Timer}
import rocket.HellaCacheIO
import cde.{Parameters, Field}
class RegressionIO(implicit val p: Parameters) extends ParameterizedBundle()(p) {
val start = Bool(INPUT)
val cache = new HellaCacheIO
val mem = new ClientUncachedTileLinkIO
val finished = Bool(OUTPUT)
val errored = Bool(OUTPUT)
}
abstract class Regression(implicit val p: Parameters)
extends Module with HasTileLinkParameters with HasAddrMapParameters {
val memStart = addrMap("mem").start
val memStartBlock = memStart >> p(CacheBlockOffsetBits)
val io = new RegressionIO
def disableCache() {
io.cache.req.valid := Bool(false)
io.cache.req.bits.addr := UInt(memStart)
io.cache.req.bits.typ := UInt(0)
io.cache.req.bits.cmd := M_XRD
io.cache.req.bits.tag := UInt(0)
io.cache.req.bits.data := Bits(0)
io.cache.req.bits.phys := Bool(true)
io.cache.invalidate_lr := Bool(false)
}
def disableMem() {
io.mem.acquire.valid := Bool(false)
io.mem.grant.ready := Bool(false)
}
}
/**
* This was a bug in which the TileLinkIONarrower logic screwed up
* when a PutBlock request and a narrow Get request are sent to it at the
* same time. Repeating this sequence enough times will cause a queue to
* get filled up and deadlock the system.
*/
class IOGetAfterPutBlockRegression(implicit p: Parameters) extends Regression()(p) {
val nRuns = 7
val run = Reg(init = UInt(0, log2Up(nRuns + 1)))
val (put_beat, put_done) = Counter(
io.mem.acquire.fire() && io.mem.acquire.bits.hasData(), tlDataBeats)
val started = Reg(init = Bool(false))
val put_sent = Reg(init = Bool(false))
val get_sent = Reg(init = Bool(false))
val put_acked = Reg(init = Bool(false))
val get_acked = Reg(init = Bool(false))
val both_acked = put_acked && get_acked
when (!started && io.start) { started := Bool(true) }
io.mem.acquire.valid := !put_sent && started
io.mem.acquire.bits := PutBlock(
client_xact_id = UInt(0),
addr_block = UInt(memStartBlock),
addr_beat = put_beat,
data = UInt(0))
io.mem.grant.ready := Bool(true)
io.cache.req.valid := !get_sent && started
io.cache.req.bits.addr := UInt(addrMap("io:int:bootrom").start)
io.cache.req.bits.typ := UInt(log2Ceil(32 / 8))
io.cache.req.bits.cmd := M_XRD
io.cache.req.bits.tag := UInt(0)
io.cache.invalidate_lr := Bool(false)
when (put_done) { put_sent := Bool(true) }
when (io.cache.req.fire()) { get_sent := Bool(true) }
when (io.mem.grant.fire()) { put_acked := Bool(true) }
when (io.cache.resp.valid) { get_acked := Bool(true) }
when (both_acked) {
when (run < UInt(nRuns - 1)) {
put_sent := Bool(false)
get_sent := Bool(false)
}
put_acked := Bool(false)
get_acked := Bool(false)
run := run + UInt(1)
}
io.finished := (run === UInt(nRuns))
}
/* This was a bug with merging two PutBlocks to the same address in the L2.
* The transactor would start accepting beats of the second transaction but
* acknowledge both of them when the first one finished.
* This caused the state to go funky since the next time around it would
* start the put in the middle */
class PutBlockMergeRegression(implicit p: Parameters)
extends Regression()(p) with HasTileLinkParameters {
val s_idle :: s_put :: s_wait :: s_done :: Nil = Enum(Bits(), 4)
val state = Reg(init = s_idle)
disableCache()
val l2params = p.alterPartial({ case CacheName => "L2Bank" })
val nSets = l2params(NSets)
val addr_blocks = Vec(Seq(0, 0, nSets).map(num => UInt(num + memStartBlock)))
val nSteps = addr_blocks.size
val (acq_beat, acq_done) = Counter(io.mem.acquire.fire(), tlDataBeats)
val (send_cnt, send_done) = Counter(acq_done, nSteps)
val (ack_cnt, ack_done) = Counter(io.mem.grant.fire(), nSteps)
io.mem.acquire.valid := (state === s_put)
io.mem.acquire.bits := PutBlock(
client_xact_id = send_cnt,
addr_block = addr_blocks(send_cnt),
addr_beat = acq_beat,
data = Cat(send_cnt, acq_beat))
io.mem.grant.ready := Bool(true)
when (state === s_idle && io.start) { state := s_put }
when (send_done) { state := s_wait }
when (ack_done) { state := s_done }
io.finished := (state === s_done)
}
/* Make sure the L2 does "the right thing" when a put is sent no-alloc but
* the block is already in cache. It should just treat the request as a
* regular allocating put */
class NoAllocPutHitRegression(implicit p: Parameters) extends Regression()(p) {
val (s_idle :: s_prefetch :: s_put :: s_get ::
s_wait :: s_done :: Nil) = Enum(Bits(), 6)
val state = Reg(init = s_idle)
val acq = io.mem.acquire.bits
val gnt = io.mem.grant.bits
val (put_beat, put_done) = Counter(io.mem.acquire.fire() && acq.hasData(), tlDataBeats)
val acked = Reg(init = UInt(0, tlDataBeats + 2))
val addr_block = UInt(memStartBlock + 2)
val test_data = UInt(0x3446)
val prefetch_acq = GetPrefetch(
client_xact_id = UInt(0),
addr_block = addr_block)
val put_acq = PutBlock(
client_xact_id = UInt(1),
addr_block = addr_block,
addr_beat = put_beat,
data = test_data,
alloc = Bool(false))
val get_acq = GetBlock(
client_xact_id = UInt(2),
addr_block = addr_block)
io.mem.acquire.valid := state.isOneOf(s_prefetch, s_get, s_put)
io.mem.acquire.bits := MuxCase(get_acq, Seq(
(state === s_prefetch) -> prefetch_acq,
(state === s_put) -> put_acq))
io.mem.grant.ready := Bool(true)
when (state === s_idle && io.start) { state := s_prefetch }
when (state === s_prefetch && io.mem.acquire.ready) { state := s_put }
when (put_done) { state := s_get }
when (state === s_get && io.mem.acquire.ready) { state := s_wait }
when (state === s_wait && acked.andR) { state := s_done }
when (io.mem.grant.fire()) {
switch (gnt.client_xact_id) {
is (UInt(0)) { acked := acked | UInt(1 << tlDataBeats) }
is (UInt(1)) { acked := acked | UInt(1 << (tlDataBeats + 1)) }
is (UInt(2)) { acked := acked | UIntToOH(gnt.addr_beat) }
}
}
val data_mismatch = io.mem.grant.fire() && gnt.hasData() && gnt.data =/= test_data
assert(!data_mismatch, "NoAllocPutHitRegression: data does not match")
io.finished := (state === s_done)
io.errored := data_mismatch
disableCache()
}
/** Make sure L2 does the right thing when multiple puts are sent for the
* same block, but only the first one has the alloc bit set. */
class MixedAllocPutRegression(implicit p: Parameters) extends Regression()(p) {
val (s_idle :: s_pf_send :: s_pf_wait :: s_put_send :: s_put_wait ::
s_get_send :: s_get_wait :: s_done :: Nil) = Enum(Bits(), 8)
val state = Reg(init = s_idle)
/** We have to test two cases: one when the block is already cached
* and one when the block is not yet cached.
* We use prefetching to assure the first case. */
val test_data = Vec(
UInt("h2222222211111111"),
UInt("h3333333333333333"),
UInt("h4444444444444444"),
UInt("h5555555555555555"))
val test_alloc = Vec(Bool(false), Bool(false), Bool(true), Bool(false))
val test_block = Vec(
Seq.fill(2) { UInt(memStartBlock + 15) } ++
Seq.fill(2) { UInt(memStartBlock + 16) })
val test_beat = Vec(UInt(0), UInt(2), UInt(1), UInt(2))
val (put_acq_id, put_acq_done) = Counter(
state === s_put_send && io.mem.acquire.ready, test_data.size)
val (put_gnt_cnt, put_gnt_done) = Counter(
state === s_put_wait && io.mem.grant.valid, test_data.size)
val (get_acq_id, get_acq_done) = Counter(
state === s_get_send && io.mem.acquire.ready, test_data.size)
val (get_gnt_cnt, get_gnt_done) = Counter(
state === s_get_wait && io.mem.grant.valid, test_data.size)
val pf_acquire = PutPrefetch(
client_xact_id = UInt(0),
addr_block = UInt(memStartBlock + 15))
val put_acquire = Put(
client_xact_id = put_acq_id,
addr_block = test_block(put_acq_id),
addr_beat = test_beat(put_acq_id),
data = test_data(put_acq_id),
alloc = test_alloc(put_acq_id))
val get_acquire = Get(
client_xact_id = get_acq_id,
addr_block = test_block(get_acq_id),
addr_beat = test_beat(get_acq_id))
io.mem.acquire.valid := state.isOneOf(s_pf_send, s_put_send, s_get_send)
io.mem.acquire.bits := MuxLookup(state, pf_acquire, Seq(
s_put_send -> put_acquire,
s_get_send -> get_acquire))
io.mem.grant.ready := state.isOneOf(s_pf_wait, s_put_wait, s_get_wait)
when (state === s_idle && io.start) { state := s_pf_send }
when (state === s_pf_send && io.mem.acquire.ready) { state := s_pf_wait }
when (state === s_pf_wait && io.mem.grant.valid) { state := s_put_send }
when (put_acq_done) { state := s_put_wait }
when (put_gnt_done) { state := s_get_send }
when (get_acq_done) { state := s_get_wait }
when (get_gnt_done) { state := s_done }
io.finished := (state === s_done)
val data_mismatch = state === s_get_wait && io.mem.grant.fire() &&
io.mem.grant.bits.data =/= test_data(io.mem.grant.bits.client_xact_id)
assert(!data_mismatch, "MixedAllocPutRegression: data mismatch")
io.errored := data_mismatch
disableCache()
}
/* Make sure each no-alloc put triggers a request to outer memory.
* Unfortunately, there's no way to verify that this works except by looking
* at the waveform */
class RepeatedNoAllocPutRegression(implicit p: Parameters) extends Regression()(p) {
disableCache()
val nPuts = 2
val (put_beat, put_done) = Counter(io.mem.acquire.fire(), tlDataBeats)
val (req_cnt, req_done) = Counter(put_done, nPuts)
val sending = Reg(init = Bool(false))
val acked = Reg(init = UInt(0, nPuts))
when (!sending && io.start) { sending := Bool(true) }
when (sending && req_done) { sending := Bool(false) }
io.mem.acquire.valid := sending
io.mem.acquire.bits := PutBlock(
client_xact_id = req_cnt,
addr_block = UInt(memStartBlock + 5),
addr_beat = put_beat,
data = Cat(req_cnt, UInt(0, 8)),
alloc = Bool(false))
io.mem.grant.ready := Bool(true)
when (io.mem.grant.fire()) {
acked := acked | UIntToOH(io.mem.grant.bits.client_xact_id)
}
io.finished := acked.andR
}
/* Make sure write masking works properly by writing a block of data
* piece by piece */
class WriteMaskedPutBlockRegression(implicit p: Parameters) extends Regression()(p) {
disableCache()
val (s_idle :: s_put_send :: s_put_ack :: s_stall ::
s_get_send :: s_get_ack :: s_done :: Nil) = Enum(Bits(), 7)
val state = Reg(init = s_idle)
val post_stall_state = Reg(init = s_idle)
val gnt = io.mem.grant.bits
val acq = io.mem.acquire.bits
val stage = Reg(init = UInt(0, 1))
val (put_beat, put_block_done) = Counter(
io.mem.acquire.fire() && acq.hasData(), tlDataBeats)
val put_data = UInt(0x30010040, tlDataBits) + (put_beat << UInt(2))
val put_acq = PutBlock(
client_xact_id = UInt(0),
addr_block = UInt(memStartBlock + 7),
addr_beat = put_beat,
data = Mux(put_beat(0) === stage, put_data, UInt(0)),
wmask = Some(Mux(put_beat(0) === stage, Acquire.fullWriteMask, Bits(0))))
val get_acq = GetBlock(
client_xact_id = UInt(0),
addr_block = UInt(memStartBlock + 6) + stage)
io.mem.acquire.valid := state.isOneOf(s_put_send, s_get_send)
io.mem.acquire.bits := Mux(state === s_get_send, get_acq, put_acq)
io.mem.grant.ready := state.isOneOf(s_put_ack, s_get_ack)
val (get_cnt, get_done) = Counter(
io.mem.grant.fire() && gnt.hasData(), tlDataBeats)
val get_data = UInt(0x30010040, tlDataBits) + (get_cnt << UInt(2))
val (stall_cnt, stall_done) = Counter(state === s_stall, 16)
when (state === s_idle && io.start) { state := s_put_send }
when (put_block_done) { state := s_put_ack }
when (state === s_put_ack && io.mem.grant.valid) {
post_stall_state := s_get_send
state := s_stall
}
when (stall_done) { state := post_stall_state }
when (state === s_get_send && io.mem.acquire.ready) { state := s_get_ack }
when (get_done) {
// do a read in-between the two put-blocks to overwrite the data buffer
when (stage === UInt(0)) {
stage := stage + UInt(1)
post_stall_state := s_put_send
state := s_stall
} .otherwise { state := s_done }
}
io.finished := (state === s_done)
val data_mismatch = io.mem.grant.fire() && io.mem.grant.bits.hasData() &&
stage =/= UInt(0) && io.mem.grant.bits.data =/= get_data
assert(!data_mismatch, "WriteMaskedPutBlockRegression: data does not match")
io.errored := data_mismatch
}
/* Make sure a prefetch that hits returns immediately. */
class PrefetchHitRegression(implicit p: Parameters) extends Regression()(p) {
disableCache()
val sending = Reg(init = Bool(false))
val nPrefetches = 2
val (pf_cnt, pf_done) = Counter(io.mem.acquire.fire(), nPrefetches)
val acked = Reg(init = UInt(0, nPrefetches))
val acq_bits = Vec(
PutPrefetch(client_xact_id = UInt(0), addr_block = UInt(memStartBlock + 12)),
GetPrefetch(client_xact_id = UInt(1), addr_block = UInt(memStartBlock + 12)))
io.mem.acquire.valid := sending
io.mem.acquire.bits := acq_bits(pf_cnt)
io.mem.grant.ready := Bool(true)
when (io.mem.grant.fire()) {
acked := acked | UIntToOH(io.mem.grant.bits.client_xact_id)
}
when (!sending && io.start) { sending := Bool(true) }
when (sending && pf_done) { sending := Bool(false) }
io.finished := acked.andR
io.errored := Bool(false)
}
/* This tests the sort of access the pattern that Hwacha uses.
* Instead of using PutBlock/GetBlock, it uses word-sized puts and gets
* to the same block.
* Each request has the same client_xact_id, but there are multiple in flight.
* The responses therefore must come back in the order they are sent. */
class SequentialSameIdGetRegression(implicit p: Parameters) extends Regression()(p) {
disableCache()
val sending = Reg(init = Bool(false))
val finished = Reg(init = Bool(false))
val (send_cnt, send_done) = Counter(io.mem.acquire.fire(), tlDataBeats)
val (recv_cnt, recv_done) = Counter(io.mem.grant.fire(), tlDataBeats)
when (!sending && io.start) { sending := Bool(true) }
when (send_done) { sending := Bool(false) }
when (recv_done) { finished := Bool(true) }
io.mem.acquire.valid := sending
io.mem.acquire.bits := Get(
client_xact_id = UInt(0),
addr_block = UInt(memStartBlock + 9),
addr_beat = send_cnt)
io.mem.grant.ready := !finished
io.finished := finished
val beat_mismatch = io.mem.grant.fire() && io.mem.grant.bits.addr_beat =/= recv_cnt
assert(!beat_mismatch, "SequentialSameIdGetRegression: grant received out of order")
io.errored := beat_mismatch
}
/* Test that a writeback will occur by writing nWays + 1 blocks to the same
* set. This assumes that there is only a single cache bank. If we want to
* test multibank configurations, we'll have to think of some other way to
* determine which banks are conflicting */
class WritebackRegression(implicit p: Parameters) extends Regression()(p) {
disableCache()
val l2params = p.alterPartial({ case CacheName => "L2Bank" })
val nSets = l2params(NSets)
val nWays = l2params(NWays)
val addr_blocks = Vec.tabulate(nWays + 1) { i => UInt(memStartBlock + i * nSets) }
val data = Vec.tabulate(nWays + 1) { i => UInt((i + 1) * 1423) }
val (put_beat, put_done) = Counter(
io.mem.acquire.fire() && io.mem.acquire.bits.hasData(), tlDataBeats)
val (get_beat, get_done) = Counter(
io.mem.grant.fire() && io.mem.grant.bits.hasData(), tlDataBeats)
val (put_cnt, _) = Counter(put_done, nWays + 1)
val (get_cnt, _) = Counter(
io.mem.acquire.fire() && !io.mem.acquire.bits.hasData(), nWays + 1)
val (ack_cnt, ack_done) = Counter(
io.mem.grant.fire() && !io.mem.grant.bits.hasData() || get_done, nWays + 1)
val s_idle :: s_put :: s_get :: s_done :: Nil = Enum(Bits(), 4)
val state = Reg(init = s_idle)
val sending = Reg(init = Bool(false))
io.mem.acquire.valid := sending
io.mem.acquire.bits := Mux(state === s_put,
PutBlock(
client_xact_id = UInt(0),
addr_block = addr_blocks(put_cnt),
addr_beat = put_beat,
data = data(put_cnt)),
GetBlock(
client_xact_id = UInt(0),
addr_block = addr_blocks(get_cnt)))
io.mem.grant.ready := !sending
when (state === s_idle && io.start) { state := s_put; sending := Bool(true) }
when (put_done || state === s_get && io.mem.acquire.fire()) {
sending := Bool(false)
}
when (get_done && !ack_done || state === s_put && io.mem.grant.fire()) {
sending := Bool(true)
}
when (ack_done) { state := Mux(state === s_put, s_get, s_done) }
io.finished := (state === s_done)
val data_mismatch = io.mem.grant.fire() && io.mem.grant.bits.hasData() &&
io.mem.grant.bits.data =/= data(ack_cnt)
assert(!data_mismatch, "WritebackRegression: incorrect data")
io.errored := data_mismatch
}
class ReleaseRegression(implicit p: Parameters) extends Regression()(p) {
disableMem()
val l1params = p.alterPartial({ case CacheName => "L1D" })
val nSets = l1params(NSets)
val nWays = l1params(NWays)
val blockOffset = l1params(CacheBlockOffsetBits)
val startBlock = memStartBlock + 10
val addr_blocks = Vec.tabulate(nWays + 1) { i => UInt(startBlock + i * nSets) }
val data = Vec.tabulate(nWays + 1) { i => UInt((i + 1) * 1522) }
val (req_idx, req_done) = Counter(io.cache.req.fire(), nWays + 1)
val (resp_idx, resp_done) = Counter(io.cache.resp.valid, nWays + 1)
val sending = Reg(init = Bool(false))
val s_idle :: s_write :: s_read :: s_done :: Nil = Enum(Bits(), 4)
val state = Reg(init = s_idle)
io.cache.req.valid := sending && state.isOneOf(s_write, s_read)
io.cache.req.bits.addr := Cat(addr_blocks(req_idx), UInt(0, blockOffset))
io.cache.req.bits.typ := UInt(log2Ceil(64 / 8))
io.cache.req.bits.cmd := Mux(state === s_write, M_XWR, M_XRD)
io.cache.req.bits.tag := UInt(0)
io.cache.req.bits.data := data(req_idx)
io.cache.req.bits.phys := Bool(true)
io.cache.invalidate_lr := Bool(false)
when (state === s_idle && io.start) {
sending := Bool(true)
state := s_write
}
when (resp_done) { state := Mux(state === s_write, s_read, s_done) }
when (io.cache.req.fire()) { sending := Bool(false) }
when (io.cache.resp.valid) { sending := Bool(true) }
io.finished := (state === s_done)
val data_mismatch = io.cache.resp.valid && io.cache.resp.bits.has_data &&
io.cache.resp.bits.data =/= data(resp_idx)
assert(!data_mismatch, "ReleaseRegression: data mismatch")
io.errored := data_mismatch
}
class PutBeforePutBlockRegression(implicit p: Parameters) extends Regression()(p) {
val (s_idle :: s_put :: s_putblock :: s_wait ::
s_finished :: Nil) = Enum(Bits(), 5)
val state = Reg(init = s_idle)
disableCache()
val (put_block_beat, put_block_done) = Counter(
state === s_putblock && io.mem.acquire.ready, tlDataBeats)
val put_acquire = Put(
client_xact_id = UInt(0),
addr_block = UInt(memStartBlock),
addr_beat = UInt(0),
data = UInt(0),
wmask = Some(UInt((1 << 8) - 1)))
val put_block_acquire = PutBlock(
client_xact_id = UInt(1),
addr_block = UInt(memStartBlock + 1),
addr_beat = put_block_beat,
data = UInt(0))
val put_acked = Reg(init = UInt(0, 2))
val (ack_cnt, all_acked) = Counter(io.mem.grant.fire(), 2)
io.mem.acquire.valid := state.isOneOf(s_put, s_putblock)
io.mem.acquire.bits := Mux(state === s_put, put_acquire, put_block_acquire)
io.mem.grant.ready := (state === s_wait)
when (state === s_idle && io.start) { state := s_put }
when (state === s_put && io.mem.acquire.ready) { state := s_putblock }
when (put_block_done) { state := s_wait }
when (all_acked) { state := s_finished }
io.finished := (state === s_finished)
io.errored := Bool(false)
}
/**
* Make sure that multiple gets to the same line and beat are merged
* correctly, even if it is a cache miss.
*/
class MergedGetRegression(implicit p: Parameters) extends Regression()(p) {
disableCache()
val l2params = p.alterPartial({ case CacheName => "L2Bank" })
val nSets = l2params(NSets)
val nWays = l2params(NWays)
val (s_idle :: s_put :: s_get :: s_done :: Nil) = Enum(Bits(), 4)
val state = Reg(init = s_idle)
// Write NWays + 1 different conflicting lines to force an eviction of the first line
val (put_acq_cnt, put_acq_done) = Counter(state === s_put && io.mem.acquire.fire(), nWays + 1)
val (put_gnt_cnt, put_gnt_done) = Counter(state === s_put && io.mem.grant.fire(), nWays + 1)
val put_addr = UInt(memStartBlock) + Cat(put_acq_cnt, UInt(0, log2Up(nSets)))
val (get_acq_cnt, get_acq_done) = Counter(state === s_get && io.mem.acquire.fire(), 2)
val (get_gnt_cnt, get_gnt_done) = Counter(state === s_get && io.mem.grant.fire(), 2)
val sending = Reg(init = Bool(false))
when (state === s_idle && io.start) { state := s_put; sending := Bool(true) }
when (state === s_put) {
when (io.mem.acquire.fire()) { sending := Bool(false) }
when (io.mem.grant.fire()) { sending := Bool(true) }
when (put_gnt_done) { state := s_get }
}
when (state === s_get) {
when (get_acq_done) { sending := Bool(false) }
when (get_gnt_done) { state := s_done }
}
io.mem.acquire.valid := sending
io.mem.acquire.bits := Mux(state === s_put,
Put(
client_xact_id = UInt(0),
addr_block = put_addr,
addr_beat = UInt(3),
data = UInt("hdabb9321")),
Get(
client_xact_id = get_acq_cnt,
addr_block = UInt(memStartBlock),
addr_beat = UInt(3)))
io.mem.grant.ready := !sending
val data_mismatch = io.mem.grant.valid && io.mem.grant.bits.hasData() &&
io.mem.grant.bits.data =/= UInt("hdabb9321")
assert(!data_mismatch, "RepeatedGetRegression: wrong data back")
io.finished := state === s_done
io.errored := data_mismatch
}
/**
* Make sure that multiple puts to the same line and beat are merged
* correctly, even if there is a release from the L1
*/
class MergedPutRegression(implicit p: Parameters) extends Regression()(p)
with HasTileLinkParameters {
val (s_idle :: s_cache_req :: s_cache_wait ::
s_put :: s_get :: s_done :: Nil) = Enum(Bits(), 6)
val state = Reg(init = s_idle)
io.cache.req.valid := (state === s_cache_req)
io.cache.req.bits.cmd := M_XWR
io.cache.req.bits.typ := UInt(log2Ceil(64 / 8))
io.cache.req.bits.addr := UInt(memStart)
io.cache.req.bits.data := UInt(1)
io.cache.req.bits.tag := UInt(0)
val sending = Reg(init = Bool(false))
val delaying = Reg(init = Bool(false))
val (put_cnt, put_done) = Counter(io.mem.acquire.fire(), tlMaxClientXacts)
val (delay_cnt, delay_done) = Counter(delaying, 8)
val put_acked = Reg(UInt(width = 3), init = UInt(0))
io.mem.acquire.valid := sending && !delaying
io.mem.acquire.bits := Mux(state === s_put,
Put(
client_xact_id = put_cnt,
addr_block = UInt(memStartBlock),
addr_beat = UInt(0),
data = put_cnt + UInt(2)),
Get(
client_xact_id = UInt(0),
addr_block = UInt(memStartBlock),
addr_beat = UInt(0)))
io.mem.grant.ready := Bool(true)
when (state === s_idle && io.start) { state := s_cache_req }
when (io.cache.req.fire()) { state := s_cache_wait }
when (io.cache.resp.valid) { state := s_put; sending := Bool(true) }
when (io.mem.acquire.fire()) {
delaying := Bool(true)
when (put_done || state === s_get) { sending := Bool(false) }
}
when (delay_done) { delaying := Bool(false) }
when (io.mem.grant.fire()) {
when (state === s_put) {
put_acked := put_acked | UIntToOH(io.mem.grant.bits.client_xact_id)
}
when (state === s_get) { state := s_done }
}
when (state === s_put && put_acked.andR) {
state := s_get
sending := Bool(true)
}
val expected_data = UInt(2 + tlMaxClientXacts - 1)
val data_mismatch = io.mem.grant.valid && io.mem.grant.bits.hasData() &&
io.mem.grant.bits.data =/= expected_data
assert(!data_mismatch, "MergedPutRegression: data mismatch")
io.finished := (state === s_done)
io.errored := data_mismatch
}
object RegressionTests {
def cacheRegressions(implicit p: Parameters) = Seq(
Module(new PutBlockMergeRegression),
Module(new NoAllocPutHitRegression),
Module(new RepeatedNoAllocPutRegression),
Module(new WriteMaskedPutBlockRegression),
Module(new PrefetchHitRegression),
Module(new SequentialSameIdGetRegression),
Module(new WritebackRegression),
Module(new PutBeforePutBlockRegression),
Module(new MixedAllocPutRegression),
Module(new ReleaseRegression),
Module(new MergedGetRegression),
Module(new MergedPutRegression))
def broadcastRegressions(implicit p: Parameters) = Seq(
Module(new IOGetAfterPutBlockRegression),
Module(new WriteMaskedPutBlockRegression),
Module(new PutBeforePutBlockRegression),
Module(new ReleaseRegression))
}
case object GroundTestRegressions extends Field[Parameters => Seq[Regression]]
class RegressionTest(implicit p: Parameters) extends GroundTest()(p) {
val regressions = p(GroundTestRegressions)(p)
val regress_idx = Reg(init = UInt(0, log2Up(regressions.size + 1)))
val cur_finished = Wire(init = Bool(false))
val all_done = (regress_idx === UInt(regressions.size))
val start = Reg(init = Bool(true))
// default output values
io.mem.head.acquire.valid := Bool(false)
io.mem.head.acquire.bits := GetBlock(
client_xact_id = UInt(0),
addr_block = UInt(0))
io.mem.head.grant.ready := Bool(false)
io.cache.head.req.valid := Bool(false)
io.cache.head.req.bits.addr := UInt(0)
io.cache.head.req.bits.typ := UInt(log2Ceil(64 / 8))
io.cache.head.req.bits.cmd := M_XRD
io.cache.head.req.bits.tag := UInt(0)
io.cache.head.req.bits.phys := Bool(true)
io.cache.head.req.bits.data := UInt(0)
io.cache.head.invalidate_lr := Bool(false)
regressions.zipWithIndex.foreach { case (regress, i) =>
val me = regress_idx === UInt(i)
regress.io.start := me && start
regress.io.mem.acquire.ready := io.mem.head.acquire.ready && me
regress.io.mem.grant.valid := io.mem.head.grant.valid && me
regress.io.mem.grant.bits := io.mem.head.grant.bits
regress.io.cache.req.ready := io.cache.head.req.ready && me
regress.io.cache.resp.valid := io.cache.head.resp.valid && me
regress.io.cache.resp.bits := io.cache.head.resp.bits
when (me) {
io.mem.head.acquire.valid := regress.io.mem.acquire.valid
io.mem.head.acquire.bits := regress.io.mem.acquire.bits
io.mem.head.grant.ready := regress.io.mem.grant.ready
io.cache.head.req.valid := regress.io.cache.req.valid
io.cache.head.req.bits := regress.io.cache.req.bits
io.cache.head.invalidate_lr := regress.io.cache.invalidate_lr
io.status.error.valid := regress.io.errored
io.status.error.bits := UInt(i)
cur_finished := regress.io.finished
}
when (regress.io.start) {
printf(s"Starting regression ${regress.getClass.getSimpleName}\n")
}
}
when (cur_finished && !all_done) {
start := Bool(true)
regress_idx := regress_idx + UInt(1)
}
when (start) { start := Bool(false) }
val timeout = Timer(5000, start, cur_finished)
assert(!timeout, "Regression timed out")
io.status.finished := all_done
io.status.timeout.valid := timeout
io.status.timeout.bits := UInt(0)
assert(!(all_done && io.mem.head.grant.valid),
"Getting grant after test completion")
when (all_done) {
io.status.error.valid := io.mem.head.grant.valid
io.status.error.bits := UInt(regressions.size)
}
}

View File

@ -0,0 +1,139 @@
package groundtest
import Chisel._
import rocket._
import uncore.tilelink._
import junctions._
import scala.util.Random
import scala.collection.mutable.ListBuffer
import cde.{Parameters, Field}
case object BuildGroundTest extends Field[Parameters => GroundTest]
case class GroundTestTileSettings(
uncached: Int = 0, cached: Int = 0, ptw: Int = 0, maxXacts: Int = 1)
case object GroundTestKey extends Field[Seq[GroundTestTileSettings]]
case object GroundTestId extends Field[Int]
trait HasGroundTestConstants {
val timeoutCodeBits = 4
val errorCodeBits = 4
}
trait HasGroundTestParameters extends HasAddrMapParameters {
implicit val p: Parameters
val tileId = p(GroundTestId)
val tileSettings = p(GroundTestKey)(tileId)
val nUncached = tileSettings.uncached
val nCached = tileSettings.cached
val nPTW = tileSettings.ptw
val memStart = addrMap("mem").start
val memStartBlock = memStart >> p(CacheBlockOffsetBits)
}
class DummyPTW(n: Int)(implicit p: Parameters) extends CoreModule()(p) {
val io = new Bundle {
val requestors = Vec(n, new TLBPTWIO).flip
}
val req_arb = Module(new RRArbiter(new PTWReq, n))
req_arb.io.in <> io.requestors.map(_.req)
req_arb.io.out.ready := Bool(true)
def vpn_to_ppn(vpn: UInt): UInt = vpn(ppnBits - 1, 0)
class QueueChannel extends ParameterizedBundle()(p) {
val ppn = UInt(width = ppnBits)
val chosen = UInt(width = log2Up(n))
}
val s1_ppn = vpn_to_ppn(req_arb.io.out.bits.addr)
val s2_ppn = RegEnable(s1_ppn, req_arb.io.out.valid)
val s2_chosen = RegEnable(req_arb.io.chosen, req_arb.io.out.valid)
val s2_valid = Reg(next = req_arb.io.out.valid)
val s2_resp = Wire(new PTWResp)
s2_resp.pte.ppn := s2_ppn
s2_resp.pte.reserved_for_software := UInt(0)
s2_resp.pte.d := Bool(true)
s2_resp.pte.a := Bool(false)
s2_resp.pte.g := Bool(false)
s2_resp.pte.u := Bool(true)
s2_resp.pte.r := Bool(true)
s2_resp.pte.w := Bool(true)
s2_resp.pte.x := Bool(false)
s2_resp.pte.v := Bool(true)
io.requestors.zipWithIndex.foreach { case (requestor, i) =>
requestor.resp.valid := s2_valid && s2_chosen === UInt(i)
requestor.resp.bits := s2_resp
requestor.status.vm := UInt("b01000")
requestor.status.prv := UInt(PRV.S)
requestor.invalidate := Bool(false)
}
}
class GroundTestStatus extends Bundle with HasGroundTestConstants {
val finished = Bool(OUTPUT)
val timeout = Valid(UInt(width = timeoutCodeBits))
val error = Valid(UInt(width = errorCodeBits))
}
class GroundTestIO(implicit val p: Parameters) extends ParameterizedBundle()(p)
with HasGroundTestParameters {
val cache = Vec(nCached, new HellaCacheIO)
val mem = Vec(nUncached, new ClientUncachedTileLinkIO)
val ptw = Vec(nPTW, new TLBPTWIO)
val status = new GroundTestStatus
}
abstract class GroundTest(implicit val p: Parameters) extends Module
with HasGroundTestParameters {
val io = new GroundTestIO
}
class GroundTestTile(resetSignal: Bool)
(implicit val p: Parameters)
extends Tile(resetSignal = resetSignal)(p)
with HasGroundTestParameters {
override val io = new TileIO {
val success = Bool(OUTPUT)
}
val test = p(BuildGroundTest)(dcacheParams)
val ptwPorts = ListBuffer.empty ++= test.io.ptw
val memPorts = ListBuffer.empty ++= test.io.mem
if (nCached > 0) {
val dcache_io = HellaCache(p(DCacheKey))(dcacheParams)
val dcacheArb = Module(new HellaCacheArbiter(nCached)(dcacheParams))
dcacheArb.io.requestor.zip(test.io.cache).foreach {
case (requestor, cache) =>
val dcacheIF = Module(new SimpleHellaCacheIF()(dcacheParams))
dcacheIF.io.requestor <> cache
requestor <> dcacheIF.io.cache
}
dcache_io.cpu <> dcacheArb.io.mem
io.cached.head <> dcache_io.mem
// SimpleHellaCacheIF leaves invalidate_lr dangling, so we wire it to false
dcache_io.cpu.invalidate_lr := Bool(false)
ptwPorts += dcache_io.ptw
}
if (ptwPorts.size > 0) {
val ptw = Module(new DummyPTW(ptwPorts.size))
ptw.io.requestors <> ptwPorts
}
require(memPorts.size == io.uncached.size)
if (memPorts.size > 0) {
io.uncached <> memPorts
}
io.success := test.io.status.finished
}

View File

@ -0,0 +1,629 @@
// This file was originally written by Matthew Naylor, University of
// Cambridge, based on code already present in the groundtest repo.
//
// This software was partly developed by the University of Cambridge
// Computer Laboratory under DARPA/AFRL contract FA8750-10-C-0237
// ("CTSRD"), as part of the DARPA CRASH research programme.
//
// This software was partly developed by the University of Cambridge
// Computer Laboratory under DARPA/AFRL contract FA8750-11-C-0249
// ("MRC2"), as part of the DARPA MRC research programme.
//
// This software was partly developed by the University of Cambridge
// Computer Laboratory as part of the Rigorous Engineering of
// Mainstream Systems (REMS) project, funded by EPSRC grant
// EP/K008528/1.
package groundtest
import Chisel._
import uncore.tilelink._
import uncore.constants._
import uncore.devices.NTiles
import junctions._
import rocket._
import scala.util.Random
import cde.{Parameters, Field}
// =======
// Outline
// =======
// Generate memory traces that result from random sequences of memory
// operations. These traces can then be validated by an external
// tool. A trace is a simply sequence of memory requests and
// responses.
// ==========================
// Trace-generator parameters
// ==========================
// Compile-time parameters:
//
// * The id of the generator (there may be more than one in a
// multi-core system).
//
// * The total number of generators present in the system.
//
// * The desired number of requests to be sent by each generator.
//
// * A bag of physical addresses, shared by all cores, from which an
// address can be drawn when generating a fresh request.
//
// * A number of random 'extra addresses', local to each core, from
// which an address can be drawn when generating a fresh request.
// (This is a way to generate a wider range of addresses without having
// to repeatedly recompile with a different address bag.)
case object AddressBag extends Field[List[BigInt]]
trait HasTraceGenParams {
implicit val p: Parameters
val numGens = p(NTiles)
val numBitsInId = log2Up(numGens)
val numReqsPerGen = p(GeneratorKey).maxRequests
val memRespTimeout = 8192
val numBitsInWord = p(XLen)
val numBytesInWord = numBitsInWord / 8
val numBitsInWordOffset = log2Up(numBytesInWord)
val addressBag = p(AddressBag)
val addressBagLen = addressBag.length
val logAddressBagLen = log2Up(addressBagLen)
val genExtraAddrs = false
val logNumExtraAddrs = 1
val numExtraAddrs = 1 << logNumExtraAddrs
val maxTags = 8
require(numBytesInWord * 8 == numBitsInWord)
require((1 << logAddressBagLen) == addressBagLen)
}
// ============
// Trace format
// ============
// Let <id> denote a generator id;
// <addr> denote an address (in hex);
// <data> denote a value that is stored at an address;
// <tag> denote a unique request/response id;
// and <time> denote an integer representing a cycle-count.
// Each line in the trace takes one of the following formats.
//
// <id>: load-req <addr> #<tag> @<time>
// <id>: load-reserve-req <addr> #<tag> @<time>
// <id>: store-req <data> <addr> #<tag> @<time>
// <id>: store-cond-req <data> <addr> #<tag> @<time>
// <id>: swap-req <data> <addr> #<tag> @<time>
// <id>: resp <data> #<tag> @<time>
// <id>: fence-req @<time>
// <id>: fence-resp @<time>
// NOTE: The (address, value) pair of every generated store is unique,
// i.e. the same value is never written to the same address twice.
// This aids trace validation.
// ============
// Random seeds
// ============
// The generator employs "unitialised registers" to seed its PRNGs;
// these are randomly initialised by the C++ backend. This means that
// the "-s" command-line argument to the Rocket emulator can be used
// to generate new traces, or to replay specific ones.
// ===========
// Tag manager
// ===========
// This is used to obtain unique tags for memory requests: each
// request must carry a unique tag since responses can come back
// out-of-order.
//
// The tag manager can be viewed as a set of tags. The user can take
// a tag out of the set (if there is one available) and later put it
// back.
class TagMan(val logNumTags : Int) extends Module {
val io = new Bundle {
// Is there a tag available?
val available = Bool(OUTPUT)
// If so, which one?
val tagOut = UInt(OUTPUT, logNumTags)
// User pulses this to take the currently available tag
val take = Bool(INPUT)
// User pulses this to put a tag back
val put = Bool(INPUT)
// And the tag put back is
val tagIn = UInt(INPUT, logNumTags)
}
// Total number of tags available
val numTags = 1 << logNumTags
// For each tag, record whether or not it is in use
val inUse = List.fill(numTags)(Reg(init = Bool(false)))
// Mapping from each tag to its 'inUse' bit
val inUseMap = (0 to numTags-1).map(i => UInt(i)).zip(inUse)
// Next tag to offer
val nextTag = Reg(init = UInt(0, logNumTags))
io.tagOut := nextTag
// Is the next tag available?
io.available := ~MuxLookup(nextTag, Bool(true), inUseMap)
// When user takes a tag
when (io.take) {
for ((i, b) <- inUseMap) {
when (i === nextTag) { b := Bool(true) }
}
nextTag := nextTag + UInt(1)
}
// When user puts a tag back
when (io.put) {
for ((i, b) <- inUseMap) {
when (i === io.tagIn) { b := Bool(false) }
}
}
}
// ===============
// Trace generator
// ===============
class TraceGenerator(id: Int)
(implicit p: Parameters) extends L1HellaCacheModule()(p)
with HasTraceGenParams {
val io = new Bundle {
val finished = Bool(OUTPUT)
val timeout = Bool(OUTPUT)
val mem = new HellaCacheIO
}
val reqTimer = Module(new Timer(8192, maxTags))
reqTimer.io.start.valid := io.mem.req.fire()
reqTimer.io.start.bits := io.mem.req.bits.tag
reqTimer.io.stop.valid := io.mem.resp.valid
reqTimer.io.stop.bits := io.mem.resp.bits.tag
assert(!reqTimer.io.timeout.valid, s"TraceGen core ${id}: request timed out")
// Random addresses
// ----------------
// Address bag, shared by all cores, taken from module parameters.
// In addition, there is a per-core random selection of extra addresses.
val addrHashMap = p(GlobalAddrMap)
val baseAddr = addrHashMap("mem").start + 0x01000000
val bagOfAddrs = addressBag.map(x => UInt(x, numBitsInWord))
val extraAddrs = (0 to numExtraAddrs-1).
map(i => Reg(UInt(width = 16)))
// A random index into the address bag.
val randAddrBagIndex = LCG(logAddressBagLen)
// A random address from the address bag.
val addrBagIndices = (0 to addressBagLen-1).
map(i => UInt(i, logAddressBagLen))
val randAddrFromBag = MuxLookup(randAddrBagIndex, UInt(0),
addrBagIndices.zip(bagOfAddrs))
// Random address from the address bag or the extra addresses.
val randAddr =
if (! genExtraAddrs) {
randAddrFromBag
}
else {
// A random index into the extra addresses.
val randExtraAddrIndex = LCG(logNumExtraAddrs)
// A random address from the extra addresses.
val extraAddrIndices = (0 to numExtraAddrs-1).
map(i => UInt(i, logNumExtraAddrs))
val randAddrFromExtra = Cat(UInt(0),
MuxLookup(randExtraAddrIndex, UInt(0),
extraAddrIndices.zip(extraAddrs)), UInt(0, 3))
Frequency(List(
(1, randAddrFromBag),
(1, randAddrFromExtra)))
}
// Random opcodes
// --------------
// Generate random opcodes for memory operations according to the
// given frequency distribution.
// Opcodes
val (opNop :: opLoad :: opStore ::
opFence :: opLRSC :: opSwap ::
opDelay :: Nil) = Enum(Bits(), 7)
// Distribution specified as a list of (frequency,value) pairs.
// NOTE: frequencies must sum to a power of two.
val randOp = Frequency(List(
(10, opLoad),
(10, opStore),
(4, opFence),
(3, opLRSC),
(3, opSwap),
(2, opDelay)))
// Request/response tags
// ---------------------
// Responses may come back out-of-order. Each request and response
// therefore contains a unique 7-bit identifier, referred to as a
// "tag", used to match each response with its corresponding request.
// Create a tag manager giving out unique 3-bit tags
val tagMan = Module(new TagMan(log2Ceil(maxTags)))
// Default inputs
tagMan.io.take := Bool(false);
tagMan.io.put := Bool(false);
tagMan.io.tagIn := UInt(0);
// Cycle counter
// -------------
// 32-bit cycle count used to record send-times of requests and
// receive-times of respones.
val cycleCount = Reg(init = UInt(0, 32))
cycleCount := cycleCount + UInt(1);
// Delay timer
// -----------
// Used to implement the delay operation and to insert random
// delays between load-reserve and store-conditional commands.
// A 16-bit timer is plenty
val delayTimer = Module(new DynamicTimer(16))
// Used to generate a random delay period
val randDelayBase = LCG16()
// Random delay period: usually small, occasionally big
val randDelay = Frequency(List(
(14, UInt(0, 13) ## randDelayBase(2, 0)),
(2, UInt(0, 11) ## randDelayBase(5, 0))))
// Default inputs
delayTimer.io.start := Bool(false)
delayTimer.io.period := randDelay
delayTimer.io.stop := Bool(false)
// Operation dispatch
// ------------------
// Hardware thread id
val tid = UInt(id, numBitsInId)
// Request & response count
val reqCount = Reg(init = UInt(0, 32))
val respCount = Reg(init = UInt(0, 32))
// Current operation being executed
val currentOp = Reg(init = opNop)
// If larger than 0, a multi-cycle operation is in progress.
// Value indicates stage of progress.
val opInProgress = Reg(init = UInt(0, 2))
// Indicate when a fresh request is to be sent
val sendFreshReq = Wire(Bool())
sendFreshReq := Bool(false)
// Used to generate unique data values
val nextData = Reg(init = UInt(1, numBitsInWord-numBitsInId))
// Registers for all the interesting parts of a request
val reqValid = Reg(init = Bool(false))
val reqAddr = Reg(init = UInt(0, numBitsInWord))
val reqData = Reg(init = UInt(0, numBitsInWord))
val reqCmd = Reg(init = UInt(0, 5))
val reqTag = Reg(init = UInt(0, 7))
// Condition on being allowed to send a fresh request
val canSendFreshReq = (!reqValid || io.mem.req.fire()) &&
tagMan.io.available
// Operation dispatch
when (reqCount < UInt(numReqsPerGen)) {
// No-op
when (currentOp === opNop) {
// Move on to a new operation
currentOp := randOp
}
// Fence
when (currentOp === opFence) {
when (opInProgress === UInt(0) && !reqValid) {
// Emit fence request
printf("%d: fence-req @%d\n", tid, cycleCount)
// Multi-cycle operation now in progress
opInProgress := UInt(1)
}
// Wait until all requests have had a response
.elsewhen (reqCount === respCount) {
// Emit fence response
printf("%d: fence-resp @%d\n", tid, cycleCount)
// Move on to a new operation
currentOp := randOp
// Operation finished
opInProgress := UInt(0)
}
}
// Delay
when (currentOp === opDelay) {
when (opInProgress === UInt(0)) {
// Start timer
delayTimer.io.start := Bool(true)
// Multi-cycle operation now in progress
opInProgress := UInt(1)
}
.elsewhen (delayTimer.io.timeout) {
// Move on to a new operation
currentOp := randOp
// Operation finished
opInProgress := UInt(0)
}
}
// Load, store, or atomic swap
when (currentOp === opLoad ||
currentOp === opStore ||
currentOp === opSwap) {
when (canSendFreshReq) {
// Set address
reqAddr := randAddr
// Set command
when (currentOp === opLoad) {
reqCmd := M_XRD
} .elsewhen (currentOp === opStore) {
reqCmd := M_XWR
} .elsewhen (currentOp === opSwap) {
reqCmd := M_XA_SWAP
}
// Send request
sendFreshReq := Bool(true)
// Move on to a new operation
currentOp := randOp
}
}
// Load-reserve and store-conditional
// First issue an LR, then delay, then issue an SC
when (currentOp === opLRSC) {
// LR request has not yet been sent
when (opInProgress === UInt(0)) {
when (canSendFreshReq) {
// Set address and command
reqAddr := randAddr
reqCmd := M_XLR
// Send request
sendFreshReq := Bool(true)
// Multi-cycle operation now in progress
opInProgress := UInt(1)
}
}
// LR request has been sent, start delay timer
when (opInProgress === UInt(1)) {
// Start timer
delayTimer.io.start := Bool(true)
// Indicate that delay has started
opInProgress := UInt(2)
}
// Delay in progress
when (opInProgress === UInt(2)) {
when (delayTimer.io.timeout) {
// Delay finished
opInProgress := UInt(3)
}
}
// Delay finished, send SC request
when (opInProgress === UInt(3)) {
when (canSendFreshReq) {
// Set command, but leave address
// i.e. use same address as LR did
reqCmd := M_XSC
// Send request
sendFreshReq := Bool(true)
// Multi-cycle operation finished
opInProgress := UInt(0)
// Move on to a new operation
currentOp := randOp
}
}
}
}
// Sending of requests
// -------------------
when (sendFreshReq) {
// Grab a unique tag for the request
reqTag := tagMan.io.tagOut
tagMan.io.take := Bool(true)
// Fill in unique data
reqData := Cat(nextData, tid)
nextData := nextData + UInt(1)
// Request is good to go!
reqValid := Bool(true)
// Increment request count
reqCount := reqCount + UInt(1)
}
.elsewhen (io.mem.req.fire()) {
// Request has been sent and there is no new request ready
reqValid := Bool(false)
}
// Wire up interface to memory
io.mem.req.valid := reqValid
io.mem.req.bits.addr := reqAddr
io.mem.req.bits.data := reqData
io.mem.req.bits.typ := UInt(log2Ceil(numBytesInWord))
io.mem.req.bits.cmd := reqCmd
io.mem.req.bits.tag := reqTag
// On cycle when request is actually sent, print it
when (io.mem.req.fire()) {
// Short-hand for address
val addr = io.mem.req.bits.addr
// Print thread id
printf("%d:", tid)
// Print command
when (reqCmd === M_XRD) {
printf(" load-req 0x%x", addr)
}
when (reqCmd === M_XLR) {
printf(" load-reserve-req 0x%x", addr)
}
when (reqCmd === M_XWR) {
printf(" store-req %d 0x%x", reqData, addr)
}
when (reqCmd === M_XSC) {
printf(" store-cond-req %d 0x%x", reqData, addr)
}
when (reqCmd === M_XA_SWAP) {
printf(" swap-req %d 0x%x", reqData, addr)
}
// Print tag
printf(" #%d", reqTag)
// Print time
printf(" @%d\n", cycleCount)
}
// Handling of responses
// ---------------------
// When a response is received
when (io.mem.resp.valid) {
// Put tag back in tag set
tagMan.io.tagIn := io.mem.resp.bits.tag
tagMan.io.put := Bool(true)
// Print response
printf("%d: resp %d #%d @%d\n", tid,
io.mem.resp.bits.data, io.mem.resp.bits.tag, cycleCount)
// Increment response count
respCount := respCount + UInt(1)
}
// Termination condition
// ---------------------
val done = reqCount === UInt(numReqsPerGen) &&
respCount === UInt(numReqsPerGen)
val donePulse = done && !Reg(init = Bool(false), next = done)
// Emit that this thread has completed
when (donePulse) {
printf(s"FINISHED ${numGens}\n")
}
io.finished := Bool(false)
io.timeout := reqTimer.io.timeout.valid
}
class NoiseGenerator(implicit val p: Parameters) extends Module
with HasTraceGenParams
with HasTileLinkParameters {
val io = new Bundle {
val mem = new ClientUncachedTileLinkIO
val finished = Bool(INPUT)
}
val idBits = tlClientXactIdBits
val xact_id_free = Reg(UInt(width = idBits), init = ~UInt(0, idBits))
val xact_id_onehot = PriorityEncoderOH(xact_id_free)
val timer = Module(new DynamicTimer(8))
timer.io.start := io.mem.acquire.fire()
timer.io.period := LCG(8, io.mem.acquire.fire())
timer.io.stop := Bool(false)
val s_start :: s_send :: s_wait :: s_done :: Nil = Enum(Bits(), 4)
val state = Reg(init = s_start)
when (state === s_start) { state := s_send }
when (io.mem.acquire.fire()) { state := s_wait }
when (state === s_wait) {
when (timer.io.timeout) { state := s_send }
when (io.finished) { state := s_done }
}
val acq_id = OHToUInt(xact_id_onehot)
val gnt_id = io.mem.grant.bits.client_xact_id
xact_id_free := (xact_id_free &
~Mux(io.mem.acquire.fire(), xact_id_onehot, UInt(0))) |
Mux(io.mem.grant.fire(), UIntToOH(gnt_id), UInt(0))
val tlBlockOffset = tlBeatAddrBits + tlByteAddrBits
val addr_idx = LCG(logAddressBagLen, io.mem.acquire.fire())
val addr_bag = Vec(addressBag.map(
addr => UInt(addr >> tlBlockOffset, tlBlockAddrBits)))
val addr_block = addr_bag(addr_idx)
val addr_beat = LCG(tlBeatAddrBits, io.mem.acquire.fire())
val acq_select = LCG(1, io.mem.acquire.fire())
val get_acquire = Get(
client_xact_id = acq_id,
addr_block = addr_block,
addr_beat = addr_beat)
val put_acquire = Put(
client_xact_id = acq_id,
addr_block = addr_block,
addr_beat = addr_beat,
data = UInt(0),
wmask = Some(UInt(0)))
io.mem.acquire.valid := (state === s_send) && xact_id_free.orR
io.mem.acquire.bits := Mux(acq_select(0), get_acquire, put_acquire)
io.mem.grant.ready := !xact_id_free(gnt_id)
}
// =======================
// Trace-generator wrapper
// =======================
class GroundTestTraceGenerator(implicit p: Parameters)
extends GroundTest()(p) with HasTraceGenParams {
require(io.mem.size <= 1)
require(io.cache.size == 1)
val traceGen = Module(new TraceGenerator(p(GroundTestId)))
io.cache.head <> traceGen.io.mem
if (io.mem.size == 1) {
val noiseGen = Module(new NoiseGenerator)
io.mem.head <> noiseGen.io.mem
noiseGen.io.finished := traceGen.io.finished
}
io.status.finished := traceGen.io.finished
io.status.timeout.valid := traceGen.io.timeout
io.status.timeout.bits := UInt(0)
io.status.error.valid := Bool(false)
}

View File

@ -0,0 +1,194 @@
package groundtest
import Chisel._
// =============
// Dynamic timer
// =============
// Timer with a dynamically-settable period.
class DynamicTimer(w: Int) extends Module {
val io = new Bundle {
val start = Bool(INPUT)
val period = UInt(INPUT, w)
val stop = Bool(INPUT)
val timeout = Bool(OUTPUT)
}
val countdown = Reg(init = UInt(0, w))
val active = Reg(init = Bool(false))
when (io.start) {
countdown := io.period
active := Bool(true)
} .elsewhen (io.stop || countdown === UInt(0)) {
active := Bool(false)
} .elsewhen (active) {
countdown := countdown - UInt(1)
}
io.timeout := countdown === UInt(0) && active
}
// ============
// LCG16 module
// ============
// A 16-bit psuedo-random generator based on a linear conguential
// generator (LCG). The state is stored in an unitialised register.
// When using the C++ backend, it is straigtforward to arrange a
// random initial value for each uninitialised register, effectively
// seeding each LCG16 instance with a different seed.
class LCG16 extends Module {
val io = new Bundle {
val out = UInt(OUTPUT, 16)
val inc = Bool(INPUT)
}
val state = Reg(UInt(width = 32))
when (io.inc) {
state := state * UInt(1103515245, 32) + UInt(12345, 32)
}
io.out := state(30, 15)
}
// ==========
// LCG module
// ==========
// An n-bit psuedo-random generator made from many instances of a
// 16-bit LCG. Parameter 'width' must be larger than 0.
class LCG(val w: Int) extends Module {
val io = new Bundle {
val out = UInt(OUTPUT, w)
val inc = Bool(INPUT)
}
require(w > 0)
val numLCG16s : Int = (w+15)/16
val outs = Seq.fill(numLCG16s) { LCG16(io.inc) }
io.out := Cat(outs)
}
object LCG16 {
def apply(inc: Bool = Bool(true)): UInt = {
val lcg = Module(new LCG16)
lcg.io.inc := inc
lcg.io.out
}
}
object LCG {
def apply(w: Int, inc: Bool = Bool(true)): UInt = {
val lcg = Module(new LCG(w))
lcg.io.inc := inc
lcg.io.out
}
}
// ======================
// Frequency distribution
// ======================
// Given a list of (frequency, value) pairs, return a random value
// according to the frequency distribution. The sum of the
// frequencies in the distribution must be a power of two.
object Frequency {
def apply(dist : List[(Int, Bits)]) : Bits = {
// Distribution must be non-empty
require(dist.length > 0)
// Require that the frequencies sum to a power of two
val (freqs, vals) = dist.unzip
val total = freqs.sum
require(isPow2(total))
// First item in the distribution
val (firstFreq, firstVal) = dist.head
// Result wire
val result = Wire(Bits(width = firstVal.getWidth))
result := UInt(0)
// Random value
val randVal = LCG(log2Up(total))
// Pick return value
var count = firstFreq
var select = when (randVal < UInt(firstFreq)) { result := firstVal }
for (p <- dist.drop(1)) {
count = count + p._1
select = select.elsewhen(randVal < UInt(count)) { result := p._2 }
}
return result
}
}
object ValidMux {
def apply[T <: Data](v1: ValidIO[T], v2: ValidIO[T]*): ValidIO[T] = {
apply(v1 +: v2.toSeq)
}
def apply[T <: Data](valids: Seq[ValidIO[T]]): ValidIO[T] = {
val out = Wire(Valid(valids.head.bits))
out.valid := valids.map(_.valid).reduce(_ || _)
out.bits := MuxCase(valids.head.bits,
valids.map(v => (v.valid -> v.bits)))
out
}
}
object DebugCombiner {
def apply(debugs: Seq[GroundTestStatus]): GroundTestStatus = {
val out = Wire(new GroundTestStatus)
out.finished := debugs.map(_.finished).reduce(_ && _)
out.timeout := ValidMux(debugs.map(_.timeout))
out.error := ValidMux(debugs.map(_.error))
out
}
}
/**
* Takes in data on one decoupled interface and broadcasts it to
* N decoupled output interfaces
*/
class Broadcaster[T <: Data](typ: T, n: Int) extends Module {
val io = new Bundle {
val in = Decoupled(typ).flip
val out = Vec(n, Decoupled(typ))
}
require (n > 0)
if (n == 1) {
io.out.head <> io.in
} else {
val idx = Reg(init = UInt(0, log2Up(n)))
val save = Reg(typ)
io.out.head.valid := idx === UInt(0) && io.in.valid
io.out.head.bits := io.in.bits
for (i <- 1 until n) {
io.out(i).valid := idx === UInt(i)
io.out(i).bits := save
}
io.in.ready := io.out.head.ready && idx === UInt(0)
when (io.in.fire()) { save := io.in.bits }
when (io.out(idx).fire()) {
when (idx === UInt(n - 1)) { idx := UInt(0) }
.otherwise { idx := idx + UInt(1) }
}
}
}
object Broadcaster {
def apply[T <: Data](in: DecoupledIO[T], n: Int): Vec[DecoupledIO[T]] = {
val split = Module(new Broadcaster(in.bits, n))
split.io.in <> in
split.io.out
}
}

View File

@ -0,0 +1,148 @@
// See LICENSE for license details.
package junctions
import Chisel._
import cde.{Parameters, Field}
import scala.collection.mutable.HashMap
case object PAddrBits extends Field[Int]
case object GlobalAddrMap extends Field[AddrMap]
trait HasAddrMapParameters {
implicit val p: Parameters
val paddrBits = p(PAddrBits)
val addrMap = p(GlobalAddrMap)
}
case class MemAttr(prot: Int, cacheable: Boolean = false)
sealed abstract class MemRegion {
def start: BigInt
def size: BigInt
def numSlaves: Int
def attr: MemAttr
def containsAddress(x: UInt) = UInt(start) <= x && x < UInt(start + size)
}
case class MemSize(size: BigInt, attr: MemAttr) extends MemRegion {
def start = 0
def numSlaves = 1
}
case class MemRange(start: BigInt, size: BigInt, attr: MemAttr) extends MemRegion {
def numSlaves = 1
}
object AddrMapProt {
val R = 0x1
val W = 0x2
val X = 0x4
val RW = R | W
val RX = R | X
val RWX = R | W | X
val SZ = 3
}
class AddrMapProt extends Bundle {
val x = Bool()
val w = Bool()
val r = Bool()
}
case class AddrMapEntry(name: String, region: MemRegion)
object AddrMap {
def apply(elems: AddrMapEntry*): AddrMap = new AddrMap(elems)
}
class AddrMap(
entriesIn: Seq[AddrMapEntry],
val start: BigInt = BigInt(0),
val collapse: Boolean = false) extends MemRegion {
private val slavePorts = HashMap[String, Int]()
private val mapping = HashMap[String, MemRegion]()
def isEmpty = entries.isEmpty
def length = entries.size
def numSlaves = slavePorts.size
val (size: BigInt, entries: Seq[AddrMapEntry], attr: MemAttr) = {
var ind = 0
var base = start
var rebasedEntries = collection.mutable.ArrayBuffer[AddrMapEntry]()
var prot = 0
var cacheable = true
for (AddrMapEntry(name, r) <- entriesIn) {
if (r.start != 0) {
val align = BigInt(1) << log2Ceil(r.size)
require(r.start >= base, s"region $name base address 0x${r.start.toString(16)} overlaps previous base 0x${base.toString(16)}")
base = r.start
} else {
base = (base + r.size - 1) / r.size * r.size
}
r match {
case r: AddrMap =>
val subMap = new AddrMap(r.entries, base, r.collapse)
rebasedEntries += AddrMapEntry(name, subMap)
mapping += name -> subMap
mapping ++= subMap.mapping.map { case (k, v) => s"$name:$k" -> v }
if (r.collapse) {
slavePorts += (name -> ind)
ind += 1
} else {
slavePorts ++= subMap.slavePorts.map {
case (k, v) => s"$name:$k" -> (ind + v)
}
ind += r.numSlaves
}
case _ =>
val e = MemRange(base, r.size, r.attr)
rebasedEntries += AddrMapEntry(name, e)
mapping += name -> e
slavePorts += name -> ind
ind += r.numSlaves
}
base += r.size
prot |= r.attr.prot
cacheable &&= r.attr.cacheable
}
(base - start, rebasedEntries, MemAttr(prot, cacheable))
}
val flatten: Seq[AddrMapEntry] = {
mapping.toSeq.map {
case (name, range: MemRange) => Some(AddrMapEntry(name, range))
case _ => None
}.flatten.sortBy(_.region.start)
}
def toRange: MemRange = MemRange(start, size, attr)
def apply(name: String): MemRegion = mapping(name)
def contains(name: String): Boolean = mapping.contains(name)
def port(name: String): Int = slavePorts(name)
def subMap(name: String): AddrMap = mapping(name).asInstanceOf[AddrMap]
def isInRegion(name: String, addr: UInt): Bool = mapping(name).containsAddress(addr)
def isCacheable(addr: UInt): Bool = {
flatten.filter(_.region.attr.cacheable).map(
_.region.containsAddress(addr)
).foldLeft(Bool(false))(_ || _)
}
def isValid(addr: UInt): Bool = {
flatten.map(_.region.containsAddress(addr)).foldLeft(Bool(false))(_ || _)
}
def getProt(addr: UInt): AddrMapProt = {
val protForRegion = flatten.map { entry =>
Mux(entry.region.containsAddress(addr),
UInt(entry.region.attr.prot, AddrMapProt.SZ), UInt(0))
}
new AddrMapProt().fromBits(protForRegion.reduce(_|_))
}
}

View File

@ -0,0 +1,333 @@
package junctions
import Chisel._
import scala.math.max
import cde.{Parameters, Field}
trait HasAtosParameters extends HasNastiParameters {
// round up to a multiple of 32
def roundup(n: Int) = 32 * ((n - 1) / 32 + 1)
val atosUnionBits = max(
nastiXIdBits + nastiXDataBits + nastiWStrobeBits + 1,
nastiXIdBits + nastiXBurstBits +
nastiXSizeBits + nastiXLenBits + nastiXAddrBits)
val atosIdBits = nastiXIdBits
val atosTypBits = 2
val atosRespBits = nastiXRespBits
val atosDataBits = nastiXDataBits
val atosAddrOffset = atosIdBits
val atosLenOffset = atosIdBits + nastiXAddrBits
val atosSizeOffset = atosLenOffset + nastiXLenBits
val atosBurstOffset = atosSizeOffset + nastiXSizeBits
val atosDataOffset = atosIdBits
val atosStrobeOffset = nastiXDataBits + atosIdBits
val atosLastOffset = atosStrobeOffset + nastiWStrobeBits
val atosRequestBits = roundup(atosTypBits + atosUnionBits)
val atosResponseBits = roundup(atosTypBits + atosIdBits + atosRespBits + atosDataBits + 1)
val atosRequestBytes = atosRequestBits / 8
val atosResponseBytes = atosResponseBits / 8
val atosRequestWords = atosRequestBytes / 4
val atosResponseWords = atosResponseBytes / 4
}
abstract class AtosModule(implicit val p: Parameters)
extends Module with HasAtosParameters
abstract class AtosBundle(implicit val p: Parameters)
extends ParameterizedBundle()(p) with HasAtosParameters
object AtosRequest {
def arType = UInt("b00")
def awType = UInt("b01")
def wType = UInt("b10")
def apply(typ: UInt, union: UInt)(implicit p: Parameters): AtosRequest = {
val areq = Wire(new AtosRequest)
areq.typ := typ
areq.union := union
areq
}
def apply(ar: NastiReadAddressChannel)(implicit p: Parameters): AtosRequest =
apply(arType, Cat(ar.burst, ar.size, ar.len, ar.addr, ar.id))
def apply(aw: NastiWriteAddressChannel)(implicit p: Parameters): AtosRequest =
apply(awType, Cat(aw.burst, aw.size, aw.len, aw.addr, aw.id))
def apply(w: NastiWriteDataChannel)(implicit p: Parameters): AtosRequest =
apply(wType, Cat(w.last, w.strb, w.data, w.id))
}
class AtosRequest(implicit p: Parameters)
extends AtosBundle()(p) with Serializable {
val typ = UInt(width = atosTypBits)
val union = UInt(width = atosUnionBits)
def burst(dummy: Int = 0) =
union(atosUnionBits - 1, atosBurstOffset)
def size(dummy: Int = 0) =
union(atosBurstOffset - 1, atosSizeOffset)
def len(dummy: Int = 0) =
union(atosSizeOffset - 1, atosLenOffset)
def addr(dummy: Int = 0) =
union(atosLenOffset - 1, atosAddrOffset)
def id(dummy: Int = 0) =
union(atosIdBits - 1, 0)
def data(dummy: Int = 0) =
union(atosStrobeOffset - 1, atosDataOffset)
def strb(dummy: Int = 0) =
union(atosLastOffset - 1, atosStrobeOffset)
def last(dummy: Int = 0) =
union(atosLastOffset)
def has_addr(dummy: Int = 0) =
typ === AtosRequest.arType || typ === AtosRequest.awType
def has_data(dummy: Int = 0) =
typ === AtosRequest.wType
def is_last(dummy: Int = 0) =
typ === AtosRequest.arType || (typ === AtosRequest.wType && last())
def nbits: Int = atosRequestBits
def resp_len(dummy: Int = 0) =
MuxLookup(typ, UInt(0), Seq(
AtosRequest.arType -> (len() + UInt(1)),
AtosRequest.awType -> UInt(1)))
}
object AtosResponse {
def rType = UInt("b00")
def bType = UInt("b01")
def apply(typ: UInt, id: UInt, resp: UInt, data: UInt, last: Bool)
(implicit p: Parameters): AtosResponse = {
val aresp = Wire(new AtosResponse)
aresp.typ := typ
aresp.id := id
aresp.resp := resp
aresp.data := data
aresp.last := last
aresp
}
def apply(r: NastiReadDataChannel)(implicit p: Parameters): AtosResponse =
apply(rType, r.id, r.resp, r.data, r.last)
def apply(b: NastiWriteResponseChannel)(implicit p: Parameters): AtosResponse =
apply(bType, b.id, b.resp, UInt(0), Bool(false))
}
class AtosResponse(implicit p: Parameters)
extends AtosBundle()(p) with Serializable {
val typ = UInt(width = atosTypBits)
val id = UInt(width = atosIdBits)
val resp = UInt(width = atosRespBits)
val last = Bool()
val data = UInt(width = atosDataBits)
def has_data(dummy: Int = 0) = typ === AtosResponse.rType
def is_last(dummy: Int = 0) = !has_data() || last
def nbits: Int = atosResponseBits
}
class AtosIO(implicit p: Parameters) extends AtosBundle()(p) {
val req = Decoupled(new AtosRequest)
val resp = Decoupled(new AtosResponse).flip
}
class AtosRequestEncoder(implicit p: Parameters) extends AtosModule()(p) {
val io = new Bundle {
val ar = Decoupled(new NastiReadAddressChannel).flip
val aw = Decoupled(new NastiWriteAddressChannel).flip
val w = Decoupled(new NastiWriteDataChannel).flip
val req = Decoupled(new AtosRequest)
}
val writing = Reg(init = Bool(false))
io.ar.ready := !writing && io.req.ready
io.aw.ready := !writing && !io.ar.valid && io.req.ready
io.w.ready := writing && io.req.ready
io.req.valid := Mux(writing, io.w.valid, io.ar.valid || io.aw.valid)
io.req.bits := Mux(writing, AtosRequest(io.w.bits),
Mux(io.ar.valid, AtosRequest(io.ar.bits), AtosRequest(io.aw.bits)))
when (io.aw.fire()) { writing := Bool(true) }
when (io.w.fire() && io.w.bits.last) { writing := Bool(false) }
}
class AtosResponseDecoder(implicit p: Parameters) extends AtosModule()(p) {
val io = new Bundle {
val resp = Decoupled(new AtosResponse).flip
val b = Decoupled(new NastiWriteResponseChannel)
val r = Decoupled(new NastiReadDataChannel)
}
val is_b = io.resp.bits.typ === AtosResponse.bType
val is_r = io.resp.bits.typ === AtosResponse.rType
io.b.valid := io.resp.valid && is_b
io.b.bits := NastiWriteResponseChannel(
id = io.resp.bits.id,
resp = io.resp.bits.resp)
io.r.valid := io.resp.valid && is_r
io.r.bits := NastiReadDataChannel(
id = io.resp.bits.id,
data = io.resp.bits.data,
last = io.resp.bits.last,
resp = io.resp.bits.resp)
io.resp.ready := (is_b && io.b.ready) || (is_r && io.r.ready)
}
class AtosClientConverter(implicit p: Parameters) extends AtosModule()(p) {
val io = new Bundle {
val nasti = (new NastiIO).flip
val atos = new AtosIO
}
val req_enc = Module(new AtosRequestEncoder)
req_enc.io.ar <> io.nasti.ar
req_enc.io.aw <> io.nasti.aw
req_enc.io.w <> io.nasti.w
io.atos.req <> req_enc.io.req
val resp_dec = Module(new AtosResponseDecoder)
resp_dec.io.resp <> io.atos.resp
io.nasti.b <> resp_dec.io.b
io.nasti.r <> resp_dec.io.r
}
class AtosRequestDecoder(implicit p: Parameters) extends AtosModule()(p) {
val io = new Bundle {
val req = Decoupled(new AtosRequest).flip
val ar = Decoupled(new NastiReadAddressChannel)
val aw = Decoupled(new NastiWriteAddressChannel)
val w = Decoupled(new NastiWriteDataChannel)
}
val is_ar = io.req.bits.typ === AtosRequest.arType
val is_aw = io.req.bits.typ === AtosRequest.awType
val is_w = io.req.bits.typ === AtosRequest.wType
io.ar.valid := io.req.valid && is_ar
io.ar.bits := NastiReadAddressChannel(
id = io.req.bits.id(),
addr = io.req.bits.addr(),
size = io.req.bits.size(),
len = io.req.bits.len(),
burst = io.req.bits.burst())
io.aw.valid := io.req.valid && is_aw
io.aw.bits := NastiWriteAddressChannel(
id = io.req.bits.id(),
addr = io.req.bits.addr(),
size = io.req.bits.size(),
len = io.req.bits.len(),
burst = io.req.bits.burst())
io.w.valid := io.req.valid && is_w
io.w.bits := NastiWriteDataChannel(
id = io.req.bits.id(),
data = io.req.bits.data(),
strb = Some(io.req.bits.strb()),
last = io.req.bits.last())
io.req.ready := (io.ar.ready && is_ar) ||
(io.aw.ready && is_aw) ||
(io.w.ready && is_w)
}
class AtosResponseEncoder(implicit p: Parameters) extends AtosModule()(p) {
val io = new Bundle {
val b = Decoupled(new NastiWriteResponseChannel).flip
val r = Decoupled(new NastiReadDataChannel).flip
val resp = Decoupled(new AtosResponse)
}
val locked = Reg(init = Bool(false))
io.resp.valid := (io.b.valid && !locked) || io.r.valid
io.resp.bits := Mux(io.r.valid,
AtosResponse(io.r.bits), AtosResponse(io.b.bits))
io.b.ready := !locked && !io.r.valid && io.resp.ready
io.r.ready := io.resp.ready
when (io.r.fire() && !io.r.bits.last) { locked := Bool(true) }
when (io.r.fire() && io.r.bits.last) { locked := Bool(false) }
}
class AtosManagerConverter(implicit p: Parameters) extends AtosModule()(p) {
val io = new Bundle {
val atos = (new AtosIO).flip
val nasti = new NastiIO
}
val req_dec = Module(new AtosRequestDecoder)
val resp_enc = Module(new AtosResponseEncoder)
req_dec.io.req <> io.atos.req
io.atos.resp <> resp_enc.io.resp
io.nasti.ar <> req_dec.io.ar
io.nasti.aw <> req_dec.io.aw
io.nasti.w <> req_dec.io.w
resp_enc.io.b <> io.nasti.b
resp_enc.io.r <> io.nasti.r
}
class AtosSerializedIO(w: Int)(implicit p: Parameters) extends ParameterizedBundle()(p) {
val req = Decoupled(Bits(width = w))
val resp = Decoupled(Bits(width = w)).flip
val clk = Bool(OUTPUT)
val clk_edge = Bool(OUTPUT)
override def cloneType = new AtosSerializedIO(w)(p).asInstanceOf[this.type]
}
class AtosSerdes(w: Int)(implicit p: Parameters) extends AtosModule()(p) {
val io = new Bundle {
val wide = (new AtosIO).flip
val narrow = new AtosSerializedIO(w)
}
val ser = Module(new Serializer(w, new AtosRequest))
ser.io.in <> io.wide.req
io.narrow.req <> ser.io.out
val des = Module(new Deserializer(w, new AtosResponse))
des.io.in <> io.narrow.resp
io.wide.resp <> des.io.out
}
class AtosDesser(w: Int)(implicit p: Parameters) extends AtosModule()(p) {
val io = new Bundle {
val narrow = new AtosSerializedIO(w).flip
val wide = new AtosIO
}
val des = Module(new Deserializer(w, new AtosRequest))
des.io.in <> io.narrow.req
io.wide.req <> des.io.out
val ser = Module(new Serializer(w, new AtosResponse))
ser.io.in <> io.wide.resp
io.narrow.resp <> ser.io.out
}

View File

@ -0,0 +1,150 @@
package junctions
import Chisel._
class Crossing[T <: Data](gen: T, enq_sync: Boolean, deq_sync: Boolean) extends Bundle {
val enq = Decoupled(gen).flip()
val deq = Decoupled(gen)
val enq_clock = if (enq_sync) Some(Clock(INPUT)) else None
val deq_clock = if (deq_sync) Some(Clock(INPUT)) else None
val enq_reset = if (enq_sync) Some(Bool(INPUT)) else None
val deq_reset = if (deq_sync) Some(Bool(INPUT)) else None
}
// Output is 1 for one cycle after any edge of 'in'
object AsyncHandshakePulse {
def apply(in: Bool, sync: Int): Bool = {
val syncv = RegInit(Vec.fill(sync+1){Bool(false)})
syncv.last := in
(syncv.init zip syncv.tail).foreach { case (sink, source) => sink := source }
syncv(0) =/= syncv(1)
}
}
class AsyncHandshakeSource[T <: Data](gen: T, sync: Int, clock: Clock, reset: Bool)
extends Module(_clock = clock, _reset = reset) {
val io = new Bundle {
// These come from the source clock domain
val enq = Decoupled(gen).flip()
// These cross to the sink clock domain
val bits = gen.cloneType.asOutput
val push = Bool(OUTPUT)
val pop = Bool(INPUT)
}
val ready = RegInit(Bool(true))
val bits = Reg(gen)
val push = RegInit(Bool(false))
io.enq.ready := ready
io.bits := bits
io.push := push
val pop = AsyncHandshakePulse(io.pop, sync)
assert (!pop || !ready)
when (pop) {
ready := Bool(true)
}
when (io.enq.fire()) {
ready := Bool(false)
bits := io.enq.bits
push := !push
}
}
class AsyncHandshakeSink[T <: Data](gen: T, sync: Int, clock: Clock, reset: Bool)
extends Module(_clock = clock, _reset = reset) {
val io = new Bundle {
// These cross to the source clock domain
val bits = gen.cloneType.asInput
val push = Bool(INPUT)
val pop = Bool(OUTPUT)
// These go to the sink clock domain
val deq = Decoupled(gen)
}
val valid = RegInit(Bool(false))
val bits = Reg(gen)
val pop = RegInit(Bool(false))
io.deq.valid := valid
io.deq.bits := bits
io.pop := pop
val push = AsyncHandshakePulse(io.push, sync)
assert (!push || !valid)
when (push) {
valid := Bool(true)
bits := io.bits
}
when (io.deq.fire()) {
valid := Bool(false)
pop := !pop
}
}
class AsyncHandshake[T <: Data](gen: T, sync: Int = 2) extends Module {
val io = new Crossing(gen, true, true)
require (sync >= 2)
val source = Module(new AsyncHandshakeSource(gen, sync, io.enq_clock.get, io.enq_reset.get))
val sink = Module(new AsyncHandshakeSink (gen, sync, io.deq_clock.get, io.deq_reset.get))
source.io.enq <> io.enq
io.deq <> sink.io.deq
sink.io.bits := source.io.bits
sink.io.push := source.io.push
source.io.pop := sink.io.pop
}
class AsyncDecoupledTo[T <: Data](gen: T, depth: Int = 0, sync: Int = 2) extends Module {
val io = new Crossing(gen, false, true)
// !!! if depth == 0 { use Handshake } else { use AsyncFIFO }
val crossing = Module(new AsyncHandshake(gen, sync)).io
crossing.enq_clock.get := clock
crossing.enq_reset.get := reset
crossing.enq <> io.enq
crossing.deq_clock.get := io.deq_clock.get
crossing.deq_reset.get := io.deq_reset.get
io.deq <> crossing.deq
}
object AsyncDecoupledTo {
// source is in our clock domain, output is in the 'to' clock domain
def apply[T <: Data](to_clock: Clock, to_reset: Bool, source: DecoupledIO[T], depth: Int = 0, sync: Int = 2): DecoupledIO[T] = {
val to = Module(new AsyncDecoupledTo(source.bits, depth, sync))
to.io.deq_clock.get := to_clock
to.io.deq_reset.get := to_reset
to.io.enq <> source
to.io.deq
}
}
class AsyncDecoupledFrom[T <: Data](gen: T, depth: Int = 0, sync: Int = 2) extends Module {
val io = new Crossing(gen, true, false)
// !!! if depth == 0 { use Handshake } else { use AsyncFIFO }
val crossing = Module(new AsyncHandshake(gen, sync)).io
crossing.enq_clock.get := io.enq_clock.get
crossing.enq_reset.get := io.enq_reset.get
crossing.enq <> io.enq
crossing.deq_clock.get := clock
crossing.deq_reset.get := reset
io.deq <> crossing.deq
}
object AsyncDecoupledFrom {
// source is in the 'from' clock domain, output is in our clock domain
def apply[T <: Data](from_clock: Clock, from_reset: Bool, source: DecoupledIO[T], depth: Int = 0, sync: Int = 2): DecoupledIO[T] = {
val from = Module(new AsyncDecoupledFrom(source.bits, depth, sync))
from.io.enq_clock.get := from_clock
from.io.enq_reset.get := from_reset
from.io.enq <> source
from.io.deq
}
}

View File

@ -0,0 +1,549 @@
package junctions
import Chisel._
import cde.{Parameters, Field}
object HastiConstants
{
// Values for htrans
val SZ_HTRANS = 2
val HTRANS_IDLE = UInt(0, SZ_HTRANS) // No transfer requested, not in a burst
val HTRANS_BUSY = UInt(1, SZ_HTRANS) // No transfer requested, in a burst
val HTRANS_NONSEQ = UInt(2, SZ_HTRANS) // First (potentially only) request in a burst
val HTRANS_SEQ = UInt(3, SZ_HTRANS) // Following requests in a burst
// Values for hburst
val SZ_HBURST = 3
val HBURST_SINGLE = UInt(0, SZ_HBURST) // Single access (no burst)
val HBURST_INCR = UInt(1, SZ_HBURST) // Incrementing burst of arbitrary length, not crossing 1KB
val HBURST_WRAP4 = UInt(2, SZ_HBURST) // 4-beat wrapping burst
val HBURST_INCR4 = UInt(3, SZ_HBURST) // 4-beat incrementing burst
val HBURST_WRAP8 = UInt(4, SZ_HBURST) // 8-beat wrapping burst
val HBURST_INCR8 = UInt(5, SZ_HBURST) // 8-beat incrementing burst
val HBURST_WRAP16 = UInt(6, SZ_HBURST) // 16-beat wrapping burst
val HBURST_INCR16 = UInt(7, SZ_HBURST) // 16-beat incrementing burst
// Values for hresp
val SZ_HRESP = 1
val HRESP_OKAY = UInt(0, SZ_HRESP)
val HRESP_ERROR = UInt(1, SZ_HRESP)
// Values for hsize are identical to TileLink MT_SZ
// ie: 8*2^SZ_HSIZE bit transfers
val SZ_HSIZE = 3
// Values for hprot (a bitmask)
val SZ_HPROT = 4
def HPROT_DATA = UInt("b0001") // Data access or Opcode fetch
def HPROT_PRIVILEGED = UInt("b0010") // Privileged or User access
def HPROT_BUFFERABLE = UInt("b0100") // Bufferable or non-bufferable
def HPROT_CACHEABLE = UInt("b1000") // Cacheable or non-cacheable
def dgate(valid: Bool, b: UInt) = Fill(b.getWidth, valid) & b
}
import HastiConstants._
case class HastiParameters(dataBits: Int, addrBits: Int)
case object HastiId extends Field[String]
case class HastiKey(id: String) extends Field[HastiParameters]
trait HasHastiParameters {
implicit val p: Parameters
val hastiParams = p(HastiKey(p(HastiId)))
val hastiAddrBits = hastiParams.addrBits
val hastiDataBits = hastiParams.dataBits
val hastiDataBytes = hastiDataBits/8
val hastiAlignment = log2Ceil(hastiDataBytes)
}
abstract class HastiModule(implicit val p: Parameters) extends Module
with HasHastiParameters
abstract class HastiBundle(implicit val p: Parameters) extends ParameterizedBundle()(p)
with HasHastiParameters
class HastiMasterIO(implicit p: Parameters) extends HastiBundle()(p) {
val htrans = UInt(OUTPUT, SZ_HTRANS)
val hmastlock = Bool(OUTPUT)
val haddr = UInt(OUTPUT, hastiAddrBits)
val hwrite = Bool(OUTPUT)
val hburst = UInt(OUTPUT, SZ_HBURST)
val hsize = UInt(OUTPUT, SZ_HSIZE)
val hprot = UInt(OUTPUT, SZ_HPROT)
val hwdata = Bits(OUTPUT, hastiDataBits)
val hrdata = Bits(INPUT, hastiDataBits)
val hready = Bool(INPUT)
val hresp = UInt(INPUT, SZ_HRESP)
def isNSeq(dummy:Int=0) = htrans === HTRANS_NONSEQ // SEQ does not start a NEW request
def isHold(dummy:Int=0) = htrans === HTRANS_BUSY || htrans === HTRANS_SEQ
def isIdle(dummy:Int=0) = htrans === HTRANS_IDLE || htrans === HTRANS_BUSY
}
class HastiSlaveIO(implicit p: Parameters) extends HastiBundle()(p) {
val htrans = UInt(INPUT, SZ_HTRANS)
val hmastlock = Bool(INPUT)
val haddr = UInt(INPUT, hastiAddrBits)
val hwrite = Bool(INPUT)
val hburst = UInt(INPUT, SZ_HBURST)
val hsize = UInt(INPUT, SZ_HSIZE)
val hprot = UInt(INPUT, SZ_HPROT)
val hwdata = Bits(INPUT, hastiDataBits)
val hrdata = Bits(OUTPUT, hastiDataBits)
val hsel = Bool(INPUT)
val hready = Bool(OUTPUT)
val hresp = UInt(OUTPUT, SZ_HRESP)
}
/* A diverted master is told hready when his address phase goes nowhere.
* In this case, we buffer his address phase request and replay it later.
* NOTE: this must optimize to nothing when divert is constantly false.
*/
class MasterDiversion(implicit p: Parameters) extends HastiModule()(p) {
val io = new Bundle {
val in = (new HastiMasterIO).flip
val out = (new HastiMasterIO)
val divert = Bool(INPUT)
}
val full = Reg(init = Bool(false))
val buffer = Reg(new HastiMasterIO)
when (io.out.hready) {
full := Bool(false)
}
when (io.divert) {
full := Bool(true)
buffer := io.in
}
// If the master is diverted, he must also have been told hready
assert (!io.divert || io.in.hready,
"Diverted but not ready");
// Replay the request we diverted
io.out.htrans := Mux(full, buffer.htrans, io.in.htrans)
io.out.hmastlock := Mux(full, buffer.hmastlock, io.in.hmastlock)
io.out.haddr := Mux(full, buffer.haddr, io.in.haddr)
io.out.hwrite := Mux(full, buffer.hwrite, io.in.hwrite)
io.out.hburst := Mux(full, buffer.hburst, io.in.hburst)
io.out.hsize := Mux(full, buffer.hsize, io.in.hsize)
io.out.hprot := Mux(full, buffer.hprot, io.in.hprot)
io.out.hwdata := Mux(full, buffer.hwdata, io.in.hwdata)
// Pass slave response back
io.in.hrdata := io.out.hrdata
io.in.hresp := io.out.hresp
io.in.hready := io.out.hready && !full // Block master while we steal his address phase
}
/* Masters with lower index have priority over higher index masters.
* However, a lower priority master will retain control of a slave when EITHER:
* 1. a burst is in progress (switching slaves mid-burst violates AHB-lite at slave)
* 2. a transfer was waited (the standard forbids changing requests in this case)
*
* If a master raises hmastlock, it will be waited until no other master has inflight
* requests; then, it acquires exclusive control of the crossbar until hmastlock is low.
*
* To implement an AHB-lite crossbar, it is important to realize that requests and
* responses are coupled. Unlike modern bus protocols where the response data has flow
* control independent of the request data, in AHB-lite, both flow at the same time at
* the sole discretion of the slave via the hready signal. The address and data are
* delivered on two back-to-back cycles, the so-called address and data phases.
*
* Masters can only be connected to a single slave at a time. If a master had two different
* slave connections on the address and data phases, there would be two independent hready
* signals. An AHB-lite slave can assume that data flows when it asserts hready. If the data
* slave deasserts hready while the address slave asserts hready, the master is put in the
* impossible position of being in data phase on two slaves at once. For this reason, when
* a master issues back-to-back accesses to distinct slaves, we inject a pipeline bubble
* between the two requests to limit the master to just a single slave at a time.
*
* Conversely, a slave CAN have two masters attached to it. This is unproblematic, because
* the only signal which governs data flow is hready. Thus, both masters can be stalled
* safely by the single slave.
*/
class HastiXbar(nMasters: Int, addressMap: Seq[UInt=>Bool])(implicit p: Parameters) extends HastiModule()(p) {
val io = new Bundle {
val masters = Vec(nMasters, new HastiMasterIO).flip
val slaves = Vec(addressMap.size, new HastiSlaveIO).flip
}
val nSlaves = addressMap.size
// Setup diversions infront of each master
val diversions = Seq.tabulate(nMasters) { m => Module(new MasterDiversion) }
(io.masters zip diversions) foreach { case (m, d) => d.io.in <> m }
// Handy short-hand
val masters = diversions map (_.io.out)
val slaves = io.slaves
// Lock status of the crossbar
val lockedM = Reg(init = Vec.fill(nMasters)(Bool(false)))
val isLocked = lockedM.reduce(_ || _)
// This matrix governs the master-slave connections in the address phase
// It is indexed by addressPhaseGrantSM(slave)(master)
// It is guaranteed to have at most one 'true' per column and per row
val addressPhaseGrantSM = Wire(Vec(nSlaves, Vec(nMasters, Bool())))
// This matrix governs the master-slave connections in the data phase
// It is guaranteed to have at most one 'true' per column and per row
val dataPhaseGrantSM = Reg (init = Vec.fill(nSlaves)(Vec.fill(nMasters)(Bool(false))))
// This matrix is the union of the address and data phases.
// It is transposed with respect to the two previous matrices.
// It is guaranteed to contain at most one 'true' per master row.
// However, two 'true's per slave column are permitted.
val unionGrantMS = Vec.tabulate(nMasters) { m => Vec.tabulate(nSlaves) { s =>
addressPhaseGrantSM(s)(m) || dataPhaseGrantSM(s)(m) } }
// Confirm the guarantees made above
def justOnce(v: Vec[Bool]) = v.fold(Bool(false)) { case (p, v) =>
assert (!p || !v)
p || v
}
addressPhaseGrantSM foreach { s => justOnce(s) }
unionGrantMS foreach { s => justOnce(s) }
// Data phase follows address phase whenever the slave is ready
(slaves zip (dataPhaseGrantSM zip addressPhaseGrantSM)) foreach { case (s, (d, a)) =>
when (s.hready) { d := a }
}
// Record the grant state from the previous cycle; needed in case we hold access
val priorAddressPhaseGrantSM = RegNext(addressPhaseGrantSM)
// If a master says BUSY or SEQ, it is in the middle of a burst.
// In this case, it MUST stay attached to the same slave as before.
// Otherwise, it would violate the AHB-lite specification as seen by
// the slave, which is guaranteed a complete burst of the promised length.
// One case where this matters is preventing preemption of low-prio masters.
// NOTE: this exposes a slave to bad addresses when a master is buggy
val holdBurstM = Vec(masters map { _.isHold() })
// Transform the burst hold requirement from master indexing to slave indexing
// We use the previous cycle's binding because the master continues the prior burst
val holdBurstS = Vec(priorAddressPhaseGrantSM map { m => Mux1H(m, holdBurstM) })
// If a slave says !hready to a request, it must retain the same master next cycle.
// The AHB-lite specification requires that a waited transfer remain unchanged.
// If we preempted a waited master, the new master's request could potentially differ.
val holdBusyS = RegNext(Vec(slaves map { s => !s.hready && s.hsel }))
// Combine the above two grounds to determine if the slave retains its prior master
val holdS = Vec((holdBurstS zip holdBusyS) map ({ case (a,b) => a||b }))
// Determine which master addresses match which slaves
val matchMS = Vec(masters map { m => Vec(addressMap map { afn => afn(m.haddr) }) })
// Detect requests to nowhere; we need to allow progress in this case
val nowhereM = Vec(matchMS map { s => !s.reduce(_ || _) })
// Detect if we need to inject a pipeline bubble between the master requests.
// Divert masters already granted a data phase different from next request.
// NOTE: if only one slave, matchMS is always true => bubble always false
// => the diversion registers are optimized away as they are unread
// NOTE: bubble => dataPhase => have an hready signal
val bubbleM =
Vec.tabulate(nMasters) { m =>
Vec.tabulate(nSlaves) { s => dataPhaseGrantSM(s)(m) && !matchMS(m)(s) }
.reduce(_ || _) }
// Block any request that requires bus ownership or conflicts with isLocked
val blockedM =
Vec((lockedM zip masters) map { case(l, m) => !l && (isLocked || m.hmastlock) })
// Requested access to slaves from masters (pre-arbitration)
// NOTE: isNSeq does NOT include SEQ; thus, masters who are midburst do not
// request access to a new slave. They stay tied to the old and do not get two.
// NOTE: if a master was waited, it must repeat the same request as last cycle;
// thus, it will request the same slave and not end up with two (unless buggy).
val NSeq = masters.map(_.isNSeq())
val requestSM = Vec.tabulate(nSlaves) { s => Vec.tabulate(nMasters) { m =>
matchMS(m)(s) && NSeq(m) && !bubbleM(m) && !blockedM(m) } }
// Select at most one master request per slave (lowest index = highest priority)
val selectedRequestSM = Vec(requestSM map { m => Vec(PriorityEncoderOH(m)) })
// Calculate new crossbar interconnect state
addressPhaseGrantSM := Vec((holdS zip (priorAddressPhaseGrantSM zip selectedRequestSM))
map { case (h, (p, r)) => Mux(h, p, r) })
for (m <- 0 until nMasters) {
// If the master is connected to a slave, the slave determines hready.
// However, if no slave is connected, for progress report ready anyway, if:
// bad address (swallow request) OR idle (permit stupid masters to move FSM)
val autoready = nowhereM(m) || masters(m).isIdle()
val hready = Mux1H(unionGrantMS(m), slaves.map(_.hready ^ autoready)) ^ autoready
masters(m).hready := hready
// If we diverted a master, we need to absorb his address phase to replay later
diversions(m).io.divert := (bubbleM(m) || blockedM(m)) && NSeq(m) && hready
}
// Master muxes (address and data phase are the same)
(masters zip unionGrantMS) foreach { case (m, g) => {
m.hrdata := Mux1H(g, slaves.map(_.hrdata))
m.hresp := Mux1H(g, slaves.map(_.hresp))
} }
// Slave address phase muxes
(slaves zip addressPhaseGrantSM) foreach { case (s, g) => {
s.htrans := Mux1H(g, masters.map(_.htrans))
s.haddr := Mux1H(g, masters.map(_.haddr))
s.hmastlock := isLocked
s.hwrite := Mux1H(g, masters.map(_.hwrite))
s.hsize := Mux1H(g, masters.map(_.hsize))
s.hburst := Mux1H(g, masters.map(_.hburst))
s.hprot := Mux1H(g, masters.map(_.hprot))
s.hsel := g.reduce(_ || _)
} }
// Slave data phase muxes
(slaves zip dataPhaseGrantSM) foreach { case (s, g) => {
s.hwdata := Mux1H(g, masters.map(_.hwdata))
} }
// When no master-slave connections are active, a master can take-over the bus
val canLock = !addressPhaseGrantSM.map({ v => v.reduce(_ || _) }).reduce(_ || _)
// Lowest index highest priority for lock arbitration
val reqLock = masters.map(_.hmastlock)
val winLock = PriorityEncoderOH(reqLock)
// Lock arbitration
when (isLocked) {
lockedM := (lockedM zip reqLock) map { case (a,b) => a && b }
} .elsewhen (canLock) {
lockedM := winLock
}
}
class HastiBus(amap: Seq[UInt=>Bool])(implicit p: Parameters) extends HastiModule()(p) {
val io = new Bundle {
val master = new HastiMasterIO().flip
val slaves = Vec(amap.size, new HastiSlaveIO).flip
}
val bar = Module(new HastiXbar(1, amap))
bar.io.masters(0) <> io.master
bar.io.slaves <> io.slaves
}
class HastiSlaveMux(n: Int)(implicit p: Parameters) extends HastiModule()(p) {
val io = new Bundle {
val ins = Vec(n, new HastiSlaveIO)
val out = new HastiSlaveIO().flip
}
val amap = Seq({ (_:UInt) => Bool(true)})
val bar = Module(new HastiXbar(n, amap))
io.ins <> bar.io.masters
io.out <> bar.io.slaves(0)
}
class HastiSlaveToMaster(implicit p: Parameters) extends HastiModule()(p) {
val io = new Bundle {
val in = new HastiSlaveIO
val out = new HastiMasterIO
}
io.out.htrans := Mux(io.in.hsel, io.in.htrans, HTRANS_IDLE)
io.out.hmastlock := io.in.hmastlock
io.out.haddr := io.in.haddr
io.out.hwrite := io.in.hwrite
io.out.hburst := io.in.hburst
io.out.hsize := io.in.hsize
io.out.hprot := io.in.hprot
io.out.hwdata := io.in.hwdata
io.in.hrdata := io.out.hrdata
io.in.hready := io.out.hready
io.in.hresp := io.out.hresp
}
class HastiMasterIONastiIOConverter(implicit p: Parameters) extends HastiModule()(p)
with HasNastiParameters {
val io = new Bundle {
val nasti = new NastiIO().flip
val hasti = new HastiMasterIO
}
require(hastiAddrBits == nastiXAddrBits)
require(hastiDataBits == nastiXDataBits)
val r_queue = Module(new Queue(new NastiReadDataChannel, 2, pipe = true))
val s_idle :: s_read :: s_write :: s_write_resp :: Nil = Enum(Bits(), 4)
val state = Reg(init = s_idle)
val addr = Reg(UInt(width = hastiAddrBits))
val id = Reg(UInt(width = nastiXIdBits))
val size = Reg(UInt(width = nastiXSizeBits))
val len = Reg(UInt(width = nastiXLenBits))
val data = Reg(UInt(width = nastiXDataBits))
val first = Reg(init = Bool(false))
val is_rtrans = (state === s_read) &&
(io.hasti.htrans === HTRANS_SEQ ||
io.hasti.htrans === HTRANS_NONSEQ)
val rvalid = RegEnable(is_rtrans, Bool(false), io.hasti.hready)
io.nasti.aw.ready := (state === s_idle)
io.nasti.ar.ready := (state === s_idle) && !io.nasti.aw.valid
io.nasti.w.ready := (state === s_write) && io.hasti.hready
io.nasti.b.valid := (state === s_write_resp)
io.nasti.b.bits := NastiWriteResponseChannel(id = id)
io.nasti.r <> r_queue.io.deq
r_queue.io.enq.valid := io.hasti.hready && rvalid
r_queue.io.enq.bits := NastiReadDataChannel(
id = id,
data = io.hasti.hrdata,
last = (len === UInt(0)))
assert(!r_queue.io.enq.valid || r_queue.io.enq.ready,
"NASTI -> HASTI converter queue overflow")
// How many read requests have we not delivered a response for yet?
val pending_count = r_queue.io.count + rvalid
io.hasti.haddr := addr
io.hasti.hsize := size
io.hasti.hwrite := (state === s_write)
io.hasti.hburst := HBURST_INCR
io.hasti.hprot := UInt(0)
io.hasti.hwdata := data
io.hasti.hmastlock := Bool(false)
io.hasti.htrans := MuxLookup(state, HTRANS_IDLE, Seq(
s_write -> Mux(io.nasti.w.valid,
Mux(first, HTRANS_NONSEQ, HTRANS_SEQ),
Mux(first, HTRANS_IDLE, HTRANS_BUSY)),
s_read -> MuxCase(HTRANS_BUSY, Seq(
first -> HTRANS_NONSEQ,
(pending_count <= UInt(1)) -> HTRANS_SEQ))))
when (io.nasti.aw.fire()) {
first := Bool(true)
addr := io.nasti.aw.bits.addr
id := io.nasti.aw.bits.id
size := io.nasti.aw.bits.size
state := s_write
}
when (io.nasti.ar.fire()) {
first := Bool(true)
addr := io.nasti.ar.bits.addr
id := io.nasti.ar.bits.id
size := io.nasti.ar.bits.size
len := io.nasti.ar.bits.len
state := s_read
}
when (io.nasti.w.fire()) {
first := Bool(false)
addr := addr + (UInt(1) << size)
data := io.nasti.w.bits.data
when (io.nasti.w.bits.last) { state := s_write_resp }
}
when (io.nasti.b.fire()) { state := s_idle }
when (is_rtrans && io.hasti.hready) {
first := Bool(false)
addr := addr + (UInt(1) << size)
len := len - UInt(1)
when (len === UInt(0)) { state := s_idle }
}
}
class HastiTestSRAM(depth: Int)(implicit p: Parameters) extends HastiModule()(p) {
val io = new HastiSlaveIO
// This is a test SRAM with random delays
val ready = LFSR16(Bool(true))(0) // Bool(true)
// Calculate the bitmask of which bytes are being accessed
val mask_decode = Vec.tabulate(hastiAlignment+1) (UInt(_) <= io.hsize)
val mask_wide = Vec.tabulate(hastiDataBytes) { i => mask_decode(log2Up(i+1)) }
val mask_shift = if (hastiAlignment == 0) UInt(1) else
mask_wide.asUInt() << io.haddr(hastiAlignment-1,0)
// The request had better have been aligned! (AHB-lite requires this)
if (hastiAlignment >= 1) {
assert (io.htrans === HTRANS_IDLE || io.htrans === HTRANS_BUSY ||
(io.haddr & mask_decode.asUInt()(hastiAlignment,1)) === UInt(0),
"HASTI request not aligned")
}
// The mask and address during the address phase
val a_request = io.hsel && (io.htrans === HTRANS_NONSEQ || io.htrans === HTRANS_SEQ)
val a_mask = Wire(UInt(width = hastiDataBytes))
val a_address = io.haddr(depth-1, hastiAlignment)
val a_write = io.hwrite
// for backwards compatibility with chisel2, we needed a static width in definition
a_mask := mask_shift(hastiDataBytes-1, 0)
// The data phase signals
val d_read = RegEnable(a_request && !a_write, Bool(false), ready)
val d_mask = RegEnable(a_mask, ready && a_request)
val d_wdata = Vec.tabulate(hastiDataBytes) { i => io.hwdata(8*(i+1)-1, 8*i) }
// AHB writes must occur during the data phase; this poses a structural
// hazard with reads which must occur during the address phase. To solve
// this problem, we delay the writes until there is a free cycle.
//
// The idea is to record the address information from address phase and
// then as soon as possible flush the pending write. This cannot be done
// on a cycle when there is an address phase read, but on any other cycle
// the write will execute. In the case of reads following a write, the
// result must bypass data from the pending write into the read if they
// happen to have matching address.
// Remove this once HoldUnless is in chisel3
def holdUnless[T <: Data](in : T, enable: Bool): T = Mux(!enable, RegEnable(in, enable), in)
// Pending write?
val p_valid = RegInit(Bool(false))
val p_address = Reg(a_address)
val p_mask = Reg(a_mask)
val p_latch_d = RegNext(ready && a_request && a_write, Bool(false))
val p_wdata = holdUnless(d_wdata, p_latch_d)
// Use single-ported memory with byte-write enable
val mem = SeqMem(1 << (depth-hastiAlignment), Vec(hastiDataBytes, Bits(width = 8)))
// Decide is the SRAM port is used for reading or (potentially) writing
val read = ready && a_request && !a_write
// In case we are stalled, we need to hold the read data
val d_rdata = holdUnless(mem.read(a_address, read), RegNext(read))
// Whenever the port is not needed for reading, execute pending writes
when (!read) {
when (p_valid) { mem.write(p_address, p_wdata, p_mask.toBools) }
p_valid := Bool(false)
}
// Record the request for later?
when (ready && a_request && a_write) {
p_valid := Bool(true)
p_address := a_address
p_mask := a_mask
}
// Does the read need to be muxed with the previous write?
val a_bypass = a_address === p_address && p_valid
val d_bypass = RegEnable(a_bypass, ready && a_request)
// Mux in data from the pending write
val muxdata = Vec((p_mask.toBools zip (p_wdata zip d_rdata))
map { case (m, (p, r)) => Mux(d_bypass && m, p, r) })
// Wipe out any data the master should not see (for testing)
val outdata = Vec((d_mask.toBools zip muxdata)
map { case (m, p) => Mux(d_read && ready && m, p, Bits(0)) })
// Finally, the outputs
io.hrdata := outdata.asUInt
io.hready := ready
io.hresp := HRESP_OKAY
}

View File

@ -0,0 +1,317 @@
// See LICENSE for license details.
package junctions
import Chisel._
import scala.math._
import cde.{Parameters, Field}
case object MIFAddrBits extends Field[Int]
case object MIFDataBits extends Field[Int]
case object MIFTagBits extends Field[Int]
case object MIFDataBeats extends Field[Int]
trait HasMIFParameters {
implicit val p: Parameters
val mifTagBits = p(MIFTagBits)
val mifAddrBits = p(MIFAddrBits)
val mifDataBits = p(MIFDataBits)
val mifDataBeats = p(MIFDataBeats)
}
abstract class MIFModule(implicit val p: Parameters) extends Module with HasMIFParameters
abstract class MIFBundle(implicit val p: Parameters) extends ParameterizedBundle()(p)
with HasMIFParameters
trait HasMemData extends HasMIFParameters {
val data = Bits(width = mifDataBits)
}
trait HasMemAddr extends HasMIFParameters {
val addr = UInt(width = mifAddrBits)
}
trait HasMemTag extends HasMIFParameters {
val tag = UInt(width = mifTagBits)
}
class MemReqCmd(implicit p: Parameters) extends MIFBundle()(p) with HasMemAddr with HasMemTag {
val rw = Bool()
}
class MemTag(implicit p: Parameters) extends MIFBundle()(p) with HasMemTag
class MemData(implicit p: Parameters) extends MIFBundle()(p) with HasMemData
class MemResp(implicit p: Parameters) extends MIFBundle()(p) with HasMemData with HasMemTag
class MemIO(implicit p: Parameters) extends ParameterizedBundle()(p) {
val req_cmd = Decoupled(new MemReqCmd)
val req_data = Decoupled(new MemData)
val resp = Decoupled(new MemResp).flip
}
class MemPipeIO(implicit p: Parameters) extends ParameterizedBundle()(p) {
val req_cmd = Decoupled(new MemReqCmd)
val req_data = Decoupled(new MemData)
val resp = Valid(new MemResp).flip
}
class MemSerializedIO(w: Int)(implicit p: Parameters) extends ParameterizedBundle()(p) {
val req = Decoupled(Bits(width = w))
val resp = Valid(Bits(width = w)).flip
override def cloneType = new MemSerializedIO(w)(p).asInstanceOf[this.type]
}
class MemSerdes(w: Int)(implicit p: Parameters) extends MIFModule
{
val io = new Bundle {
val wide = new MemIO().flip
val narrow = new MemSerializedIO(w)
}
val abits = io.wide.req_cmd.bits.asUInt.getWidth
val dbits = io.wide.req_data.bits.asUInt.getWidth
val rbits = io.wide.resp.bits.getWidth
val out_buf = Reg(Bits())
val in_buf = Reg(Bits())
val s_idle :: s_read_addr :: s_write_addr :: s_write_idle :: s_write_data :: Nil = Enum(UInt(), 5)
val state = Reg(init=s_idle)
val send_cnt = Reg(init=UInt(0, log2Up((max(abits, dbits)+w-1)/w)))
val data_send_cnt = Reg(init=UInt(0, log2Up(mifDataBeats)))
val adone = io.narrow.req.ready && send_cnt === UInt((abits-1)/w)
val ddone = io.narrow.req.ready && send_cnt === UInt((dbits-1)/w)
when (io.narrow.req.valid && io.narrow.req.ready) {
send_cnt := send_cnt + UInt(1)
out_buf := out_buf >> UInt(w)
}
when (io.wide.req_cmd.valid && io.wide.req_cmd.ready) {
out_buf := io.wide.req_cmd.bits.asUInt
}
when (io.wide.req_data.valid && io.wide.req_data.ready) {
out_buf := io.wide.req_data.bits.asUInt
}
io.wide.req_cmd.ready := state === s_idle
io.wide.req_data.ready := state === s_write_idle
io.narrow.req.valid := state === s_read_addr || state === s_write_addr || state === s_write_data
io.narrow.req.bits := out_buf
when (state === s_idle && io.wide.req_cmd.valid) {
state := Mux(io.wide.req_cmd.bits.rw, s_write_addr, s_read_addr)
}
when (state === s_read_addr && adone) {
state := s_idle
send_cnt := UInt(0)
}
when (state === s_write_addr && adone) {
state := s_write_idle
send_cnt := UInt(0)
}
when (state === s_write_idle && io.wide.req_data.valid) {
state := s_write_data
}
when (state === s_write_data && ddone) {
data_send_cnt := data_send_cnt + UInt(1)
state := Mux(data_send_cnt === UInt(mifDataBeats-1), s_idle, s_write_idle)
send_cnt := UInt(0)
}
val recv_cnt = Reg(init=UInt(0, log2Up((rbits+w-1)/w)))
val data_recv_cnt = Reg(init=UInt(0, log2Up(mifDataBeats)))
val resp_val = Reg(init=Bool(false))
resp_val := Bool(false)
when (io.narrow.resp.valid) {
recv_cnt := recv_cnt + UInt(1)
when (recv_cnt === UInt((rbits-1)/w)) {
recv_cnt := UInt(0)
data_recv_cnt := data_recv_cnt + UInt(1)
resp_val := Bool(true)
}
in_buf := Cat(io.narrow.resp.bits, in_buf((rbits+w-1)/w*w-1,w))
}
io.wide.resp.valid := resp_val
io.wide.resp.bits := io.wide.resp.bits.fromBits(in_buf)
}
class MemDesserIO(w: Int)(implicit p: Parameters) extends ParameterizedBundle()(p) {
val narrow = new MemSerializedIO(w).flip
val wide = new MemIO
}
class MemDesser(w: Int)(implicit p: Parameters) extends Module // test rig side
{
val io = new MemDesserIO(w)
val abits = io.wide.req_cmd.bits.asUInt.getWidth
val dbits = io.wide.req_data.bits.asUInt.getWidth
val rbits = io.wide.resp.bits.getWidth
val mifDataBeats = p(MIFDataBeats)
require(dbits >= abits && rbits >= dbits)
val recv_cnt = Reg(init=UInt(0, log2Up((rbits+w-1)/w)))
val data_recv_cnt = Reg(init=UInt(0, log2Up(mifDataBeats)))
val adone = io.narrow.req.valid && recv_cnt === UInt((abits-1)/w)
val ddone = io.narrow.req.valid && recv_cnt === UInt((dbits-1)/w)
val rdone = io.narrow.resp.valid && recv_cnt === UInt((rbits-1)/w)
val s_cmd_recv :: s_cmd :: s_data_recv :: s_data :: s_reply :: Nil = Enum(UInt(), 5)
val state = Reg(init=s_cmd_recv)
val in_buf = Reg(Bits())
when (io.narrow.req.valid && io.narrow.req.ready || io.narrow.resp.valid) {
recv_cnt := recv_cnt + UInt(1)
in_buf := Cat(io.narrow.req.bits, in_buf((rbits+w-1)/w*w-1,w))
}
io.narrow.req.ready := state === s_cmd_recv || state === s_data_recv
when (state === s_cmd_recv && adone) {
state := s_cmd
recv_cnt := UInt(0)
}
when (state === s_cmd && io.wide.req_cmd.ready) {
state := Mux(io.wide.req_cmd.bits.rw, s_data_recv, s_reply)
}
when (state === s_data_recv && ddone) {
state := s_data
recv_cnt := UInt(0)
}
when (state === s_data && io.wide.req_data.ready) {
state := s_data_recv
when (data_recv_cnt === UInt(mifDataBeats-1)) {
state := s_cmd_recv
}
data_recv_cnt := data_recv_cnt + UInt(1)
}
when (rdone) { // state === s_reply
when (data_recv_cnt === UInt(mifDataBeats-1)) {
state := s_cmd_recv
}
recv_cnt := UInt(0)
data_recv_cnt := data_recv_cnt + UInt(1)
}
val req_cmd = in_buf >> UInt(((rbits+w-1)/w - (abits+w-1)/w)*w)
io.wide.req_cmd.valid := state === s_cmd
io.wide.req_cmd.bits := io.wide.req_cmd.bits.fromBits(req_cmd)
io.wide.req_data.valid := state === s_data
io.wide.req_data.bits.data := in_buf >> UInt(((rbits+w-1)/w - (dbits+w-1)/w)*w)
val dataq = Module(new Queue(new MemResp, mifDataBeats))
dataq.io.enq <> io.wide.resp
dataq.io.deq.ready := recv_cnt === UInt((rbits-1)/w)
io.narrow.resp.valid := dataq.io.deq.valid
io.narrow.resp.bits := dataq.io.deq.bits.asUInt >> (recv_cnt * UInt(w))
}
class MemIOArbiter(val arbN: Int)(implicit p: Parameters) extends MIFModule {
val io = new Bundle {
val inner = Vec(arbN, new MemIO).flip
val outer = new MemIO
}
if(arbN > 1) {
val cmd_arb = Module(new RRArbiter(new MemReqCmd, arbN))
val choice_q = Module(new Queue(cmd_arb.io.chosen, 4))
val (data_cnt, data_done) = Counter(io.outer.req_data.fire(), mifDataBeats)
io.inner.map(_.req_cmd).zipWithIndex.zip(cmd_arb.io.in).map{ case ((req, id), arb) => {
arb.valid := req.valid
arb.bits := req.bits
arb.bits.tag := Cat(req.bits.tag, UInt(id))
req.ready := arb.ready
}}
io.outer.req_cmd.bits := cmd_arb.io.out.bits
io.outer.req_cmd.valid := cmd_arb.io.out.valid && choice_q.io.enq.ready
cmd_arb.io.out.ready := io.outer.req_cmd.ready && choice_q.io.enq.ready
choice_q.io.enq.bits := cmd_arb.io.chosen
choice_q.io.enq.valid := cmd_arb.io.out.fire() && cmd_arb.io.out.bits.rw
io.outer.req_data.bits := io.inner(choice_q.io.deq.bits).req_data.bits
io.outer.req_data.valid := io.inner(choice_q.io.deq.bits).req_data.valid && choice_q.io.deq.valid
io.inner.map(_.req_data.ready).zipWithIndex.foreach {
case(r, i) => r := UInt(i) === choice_q.io.deq.bits && choice_q.io.deq.valid
}
choice_q.io.deq.ready := data_done
io.outer.resp.ready := Bool(false)
for (i <- 0 until arbN) {
io.inner(i).resp.valid := Bool(false)
when(io.outer.resp.bits.tag(log2Up(arbN)-1,0) === UInt(i)) {
io.inner(i).resp.valid := io.outer.resp.valid
io.outer.resp.ready := io.inner(i).resp.ready
}
io.inner(i).resp.bits := io.outer.resp.bits
io.inner(i).resp.bits.tag := io.outer.resp.bits.tag >> UInt(log2Up(arbN))
}
} else { io.outer <> io.inner.head }
}
object MemIOMemPipeIOConverter {
def apply(in: MemPipeIO)(implicit p: Parameters): MemIO = {
val out = Wire(new MemIO())
in.resp.valid := out.resp.valid
in.resp.bits := out.resp.bits
out.resp.ready := Bool(true)
out.req_cmd.valid := in.req_cmd.valid
out.req_cmd.bits := in.req_cmd.bits
in.req_cmd.ready := out.req_cmd.ready
out.req_data.valid := in.req_data.valid
out.req_data.bits := in.req_data.bits
in.req_data.ready := out.req_data.ready
out
}
}
class MemPipeIOMemIOConverter(numRequests: Int)(implicit p: Parameters) extends MIFModule {
val io = new Bundle {
val cpu = new MemIO().flip
val mem = new MemPipeIO
}
val numEntries = numRequests * mifDataBeats
val size = log2Down(numEntries) + 1
val inc = Wire(Bool())
val dec = Wire(Bool())
val count = Reg(init=UInt(numEntries, size))
val watermark = count >= UInt(mifDataBeats)
when (inc && !dec) {
count := count + UInt(1)
}
when (!inc && dec) {
count := count - UInt(mifDataBeats)
}
when (inc && dec) {
count := count - UInt(mifDataBeats-1)
}
val cmdq_mask = io.cpu.req_cmd.bits.rw || watermark
io.mem.req_cmd.valid := io.cpu.req_cmd.valid && cmdq_mask
io.cpu.req_cmd.ready := io.mem.req_cmd.ready && cmdq_mask
io.mem.req_cmd.bits := io.cpu.req_cmd.bits
io.mem.req_data <> io.cpu.req_data
// Have separate queues to allow for different mem implementations
val resp_data_q = Module((new HellaQueue(numEntries)) { new MemData })
resp_data_q.io.enq.valid := io.mem.resp.valid
resp_data_q.io.enq.bits.data := io.mem.resp.bits.data
val resp_tag_q = Module((new HellaQueue(numEntries)) { new MemTag })
resp_tag_q.io.enq.valid := io.mem.resp.valid
resp_tag_q.io.enq.bits.tag := io.mem.resp.bits.tag
io.cpu.resp.valid := resp_data_q.io.deq.valid && resp_tag_q.io.deq.valid
io.cpu.resp.bits.data := resp_data_q.io.deq.bits.data
io.cpu.resp.bits.tag := resp_tag_q.io.deq.bits.tag
resp_data_q.io.deq.ready := io.cpu.resp.ready
resp_tag_q.io.deq.ready := io.cpu.resp.ready
inc := resp_data_q.io.deq.fire() && resp_tag_q.io.deq.fire()
dec := io.mem.req_cmd.fire() && !io.mem.req_cmd.bits.rw
}

View File

@ -0,0 +1,737 @@
/// See LICENSE for license details.
package junctions
import Chisel._
import scala.math.max
import scala.collection.mutable.ArraySeq
import cde.{Parameters, Field}
case object NastiKey extends Field[NastiParameters]
case class NastiParameters(dataBits: Int, addrBits: Int, idBits: Int)
trait HasNastiParameters {
implicit val p: Parameters
val nastiExternal = p(NastiKey)
val nastiXDataBits = nastiExternal.dataBits
val nastiWStrobeBits = nastiXDataBits / 8
val nastiXAddrBits = nastiExternal.addrBits
val nastiWIdBits = nastiExternal.idBits
val nastiRIdBits = nastiExternal.idBits
val nastiXIdBits = max(nastiWIdBits, nastiRIdBits)
val nastiXUserBits = 1
val nastiAWUserBits = nastiXUserBits
val nastiWUserBits = nastiXUserBits
val nastiBUserBits = nastiXUserBits
val nastiARUserBits = nastiXUserBits
val nastiRUserBits = nastiXUserBits
val nastiXLenBits = 8
val nastiXSizeBits = 3
val nastiXBurstBits = 2
val nastiXCacheBits = 4
val nastiXProtBits = 3
val nastiXQosBits = 4
val nastiXRegionBits = 4
val nastiXRespBits = 2
def bytesToXSize(bytes: UInt) = MuxLookup(bytes, UInt("b111"), Array(
UInt(1) -> UInt(0),
UInt(2) -> UInt(1),
UInt(4) -> UInt(2),
UInt(8) -> UInt(3),
UInt(16) -> UInt(4),
UInt(32) -> UInt(5),
UInt(64) -> UInt(6),
UInt(128) -> UInt(7)))
}
abstract class NastiModule(implicit val p: Parameters) extends Module
with HasNastiParameters
abstract class NastiBundle(implicit val p: Parameters) extends ParameterizedBundle()(p)
with HasNastiParameters
abstract class NastiChannel(implicit p: Parameters) extends NastiBundle()(p)
abstract class NastiMasterToSlaveChannel(implicit p: Parameters) extends NastiChannel()(p)
abstract class NastiSlaveToMasterChannel(implicit p: Parameters) extends NastiChannel()(p)
trait HasNastiMetadata extends HasNastiParameters {
val addr = UInt(width = nastiXAddrBits)
val len = UInt(width = nastiXLenBits)
val size = UInt(width = nastiXSizeBits)
val burst = UInt(width = nastiXBurstBits)
val lock = Bool()
val cache = UInt(width = nastiXCacheBits)
val prot = UInt(width = nastiXProtBits)
val qos = UInt(width = nastiXQosBits)
val region = UInt(width = nastiXRegionBits)
}
trait HasNastiData extends HasNastiParameters {
val data = UInt(width = nastiXDataBits)
val last = Bool()
}
class NastiReadIO(implicit val p: Parameters) extends ParameterizedBundle()(p) {
val ar = Decoupled(new NastiReadAddressChannel)
val r = Decoupled(new NastiReadDataChannel).flip
}
class NastiWriteIO(implicit val p: Parameters) extends ParameterizedBundle()(p) {
val aw = Decoupled(new NastiWriteAddressChannel)
val w = Decoupled(new NastiWriteDataChannel)
val b = Decoupled(new NastiWriteResponseChannel).flip
}
class NastiIO(implicit val p: Parameters) extends ParameterizedBundle()(p) {
val aw = Decoupled(new NastiWriteAddressChannel)
val w = Decoupled(new NastiWriteDataChannel)
val b = Decoupled(new NastiWriteResponseChannel).flip
val ar = Decoupled(new NastiReadAddressChannel)
val r = Decoupled(new NastiReadDataChannel).flip
}
class NastiAddressChannel(implicit p: Parameters) extends NastiMasterToSlaveChannel()(p)
with HasNastiMetadata
class NastiResponseChannel(implicit p: Parameters) extends NastiSlaveToMasterChannel()(p) {
val resp = UInt(width = nastiXRespBits)
}
class NastiWriteAddressChannel(implicit p: Parameters) extends NastiAddressChannel()(p) {
val id = UInt(width = nastiWIdBits)
val user = UInt(width = nastiAWUserBits)
}
class NastiWriteDataChannel(implicit p: Parameters) extends NastiMasterToSlaveChannel()(p)
with HasNastiData {
val id = UInt(width = nastiWIdBits)
val strb = UInt(width = nastiWStrobeBits)
val user = UInt(width = nastiWUserBits)
}
class NastiWriteResponseChannel(implicit p: Parameters) extends NastiResponseChannel()(p) {
val id = UInt(width = nastiWIdBits)
val user = UInt(width = nastiBUserBits)
}
class NastiReadAddressChannel(implicit p: Parameters) extends NastiAddressChannel()(p) {
val id = UInt(width = nastiRIdBits)
val user = UInt(width = nastiARUserBits)
}
class NastiReadDataChannel(implicit p: Parameters) extends NastiResponseChannel()(p)
with HasNastiData {
val id = UInt(width = nastiRIdBits)
val user = UInt(width = nastiRUserBits)
}
object NastiConstants {
val BURST_FIXED = UInt("b00")
val BURST_INCR = UInt("b01")
val BURST_WRAP = UInt("b10")
val RESP_OKAY = UInt("b00")
val RESP_EXOKAY = UInt("b01")
val RESP_SLVERR = UInt("b10")
val RESP_DECERR = UInt("b11")
}
import NastiConstants._
object NastiWriteAddressChannel {
def apply(id: UInt, addr: UInt, size: UInt,
len: UInt = UInt(0), burst: UInt = BURST_INCR)
(implicit p: Parameters) = {
val aw = Wire(new NastiWriteAddressChannel)
aw.id := id
aw.addr := addr
aw.len := len
aw.size := size
aw.burst := burst
aw.lock := Bool(false)
aw.cache := UInt("b0000")
aw.prot := UInt("b000")
aw.qos := UInt("b0000")
aw.region := UInt("b0000")
aw.user := UInt(0)
aw
}
}
object NastiReadAddressChannel {
def apply(id: UInt, addr: UInt, size: UInt,
len: UInt = UInt(0), burst: UInt = BURST_INCR)
(implicit p: Parameters) = {
val ar = Wire(new NastiReadAddressChannel)
ar.id := id
ar.addr := addr
ar.len := len
ar.size := size
ar.burst := burst
ar.lock := Bool(false)
ar.cache := UInt(0)
ar.prot := UInt(0)
ar.qos := UInt(0)
ar.region := UInt(0)
ar.user := UInt(0)
ar
}
}
object NastiWriteDataChannel {
def apply(data: UInt, strb: Option[UInt] = None,
last: Bool = Bool(true), id: UInt = UInt(0))
(implicit p: Parameters): NastiWriteDataChannel = {
val w = Wire(new NastiWriteDataChannel)
w.strb := strb.getOrElse(Fill(w.nastiWStrobeBits, UInt(1, 1)))
w.data := data
w.last := last
w.id := id
w.user := UInt(0)
w
}
}
object NastiReadDataChannel {
def apply(id: UInt, data: UInt, last: Bool = Bool(true), resp: UInt = UInt(0))(
implicit p: Parameters) = {
val r = Wire(new NastiReadDataChannel)
r.id := id
r.data := data
r.last := last
r.resp := resp
r.user := UInt(0)
r
}
}
object NastiWriteResponseChannel {
def apply(id: UInt, resp: UInt = UInt(0))(implicit p: Parameters) = {
val b = Wire(new NastiWriteResponseChannel)
b.id := id
b.resp := resp
b.user := UInt(0)
b
}
}
class MemIONastiIOConverter(cacheBlockOffsetBits: Int)(implicit p: Parameters) extends MIFModule
with HasNastiParameters {
val io = new Bundle {
val nasti = (new NastiIO).flip
val mem = new MemIO
}
require(mifDataBits == nastiXDataBits, "Data sizes between LLC and MC don't agree")
val (mif_cnt_out, mif_wrap_out) = Counter(io.mem.resp.fire(), mifDataBeats)
assert(!io.nasti.aw.valid || io.nasti.aw.bits.size === UInt(log2Up(mifDataBits/8)),
"Nasti data size does not match MemIO data size")
assert(!io.nasti.ar.valid || io.nasti.ar.bits.size === UInt(log2Up(mifDataBits/8)),
"Nasti data size does not match MemIO data size")
assert(!io.nasti.aw.valid || io.nasti.aw.bits.len === UInt(mifDataBeats - 1),
"Nasti length does not match number of MemIO beats")
assert(!io.nasti.ar.valid || io.nasti.ar.bits.len === UInt(mifDataBeats - 1),
"Nasti length does not match number of MemIO beats")
// according to the spec, we can't send b until the last transfer on w
val b_ok = Reg(init = Bool(true))
when (io.nasti.aw.fire()) { b_ok := Bool(false) }
when (io.nasti.w.fire() && io.nasti.w.bits.last) { b_ok := Bool(true) }
val id_q = Module(new Queue(UInt(width = nastiWIdBits), 2))
id_q.io.enq.valid := io.nasti.aw.valid && io.mem.req_cmd.ready
id_q.io.enq.bits := io.nasti.aw.bits.id
id_q.io.deq.ready := io.nasti.b.ready && b_ok
io.mem.req_cmd.bits.addr := Mux(io.nasti.aw.valid, io.nasti.aw.bits.addr, io.nasti.ar.bits.addr) >>
UInt(cacheBlockOffsetBits)
io.mem.req_cmd.bits.tag := Mux(io.nasti.aw.valid, io.nasti.aw.bits.id, io.nasti.ar.bits.id)
io.mem.req_cmd.bits.rw := io.nasti.aw.valid
io.mem.req_cmd.valid := (io.nasti.aw.valid && id_q.io.enq.ready) || io.nasti.ar.valid
io.nasti.ar.ready := io.mem.req_cmd.ready && !io.nasti.aw.valid
io.nasti.aw.ready := io.mem.req_cmd.ready && id_q.io.enq.ready
io.nasti.b.valid := id_q.io.deq.valid && b_ok
io.nasti.b.bits.id := id_q.io.deq.bits
io.nasti.b.bits.resp := UInt(0)
io.nasti.w.ready := io.mem.req_data.ready
io.mem.req_data.valid := io.nasti.w.valid
io.mem.req_data.bits.data := io.nasti.w.bits.data
assert(!io.nasti.w.valid || io.nasti.w.bits.strb.andR, "MemIO must write full cache line")
io.nasti.r.valid := io.mem.resp.valid
io.nasti.r.bits.data := io.mem.resp.bits.data
io.nasti.r.bits.last := mif_wrap_out
io.nasti.r.bits.id := io.mem.resp.bits.tag
io.nasti.r.bits.resp := UInt(0)
io.mem.resp.ready := io.nasti.r.ready
}
class NastiArbiterIO(arbN: Int)(implicit p: Parameters) extends Bundle {
val master = Vec(arbN, new NastiIO).flip
val slave = new NastiIO
override def cloneType =
new NastiArbiterIO(arbN).asInstanceOf[this.type]
}
/** Arbitrate among arbN masters requesting to a single slave */
class NastiArbiter(val arbN: Int)(implicit p: Parameters) extends NastiModule {
val io = new NastiArbiterIO(arbN)
if (arbN > 1) {
val arbIdBits = log2Up(arbN)
val ar_arb = Module(new RRArbiter(new NastiReadAddressChannel, arbN))
val aw_arb = Module(new RRArbiter(new NastiWriteAddressChannel, arbN))
val slave_r_arb_id = io.slave.r.bits.id(arbIdBits - 1, 0)
val slave_b_arb_id = io.slave.b.bits.id(arbIdBits - 1, 0)
val w_chosen = Reg(UInt(width = arbIdBits))
val w_done = Reg(init = Bool(true))
when (aw_arb.io.out.fire()) {
w_chosen := aw_arb.io.chosen
w_done := Bool(false)
}
when (io.slave.w.fire() && io.slave.w.bits.last) {
w_done := Bool(true)
}
for (i <- 0 until arbN) {
val m_ar = io.master(i).ar
val m_aw = io.master(i).aw
val m_r = io.master(i).r
val m_b = io.master(i).b
val a_ar = ar_arb.io.in(i)
val a_aw = aw_arb.io.in(i)
val m_w = io.master(i).w
a_ar <> m_ar
a_ar.bits.id := Cat(m_ar.bits.id, UInt(i, arbIdBits))
a_aw <> m_aw
a_aw.bits.id := Cat(m_aw.bits.id, UInt(i, arbIdBits))
m_r.valid := io.slave.r.valid && slave_r_arb_id === UInt(i)
m_r.bits := io.slave.r.bits
m_r.bits.id := io.slave.r.bits.id >> UInt(arbIdBits)
m_b.valid := io.slave.b.valid && slave_b_arb_id === UInt(i)
m_b.bits := io.slave.b.bits
m_b.bits.id := io.slave.b.bits.id >> UInt(arbIdBits)
m_w.ready := io.slave.w.ready && w_chosen === UInt(i) && !w_done
}
io.slave.r.ready := io.master(slave_r_arb_id).r.ready
io.slave.b.ready := io.master(slave_b_arb_id).b.ready
io.slave.w.bits := io.master(w_chosen).w.bits
io.slave.w.valid := io.master(w_chosen).w.valid && !w_done
io.slave.ar <> ar_arb.io.out
io.slave.aw.bits <> aw_arb.io.out.bits
io.slave.aw.valid := aw_arb.io.out.valid && w_done
aw_arb.io.out.ready := io.slave.aw.ready && w_done
} else { io.slave <> io.master.head }
}
/** A slave that send decode error for every request it receives */
class NastiErrorSlave(implicit p: Parameters) extends NastiModule {
val io = (new NastiIO).flip
when (io.ar.fire()) { printf("Invalid read address %x\n", io.ar.bits.addr) }
when (io.aw.fire()) { printf("Invalid write address %x\n", io.aw.bits.addr) }
val r_queue = Module(new Queue(new NastiReadAddressChannel, 1))
r_queue.io.enq <> io.ar
val responding = Reg(init = Bool(false))
val beats_left = Reg(init = UInt(0, nastiXLenBits))
when (!responding && r_queue.io.deq.valid) {
responding := Bool(true)
beats_left := r_queue.io.deq.bits.len
}
io.r.valid := r_queue.io.deq.valid && responding
io.r.bits.id := r_queue.io.deq.bits.id
io.r.bits.data := UInt(0)
io.r.bits.resp := RESP_DECERR
io.r.bits.last := beats_left === UInt(0)
r_queue.io.deq.ready := io.r.fire() && io.r.bits.last
when (io.r.fire()) {
when (beats_left === UInt(0)) {
responding := Bool(false)
} .otherwise {
beats_left := beats_left - UInt(1)
}
}
val draining = Reg(init = Bool(false))
io.w.ready := draining
when (io.aw.fire()) { draining := Bool(true) }
when (io.w.fire() && io.w.bits.last) { draining := Bool(false) }
val b_queue = Module(new Queue(UInt(width = nastiWIdBits), 1))
b_queue.io.enq.valid := io.aw.valid && !draining
b_queue.io.enq.bits := io.aw.bits.id
io.aw.ready := b_queue.io.enq.ready && !draining
io.b.valid := b_queue.io.deq.valid && !draining
io.b.bits.id := b_queue.io.deq.bits
io.b.bits.resp := Bits("b11")
b_queue.io.deq.ready := io.b.ready && !draining
}
class NastiRouterIO(nSlaves: Int)(implicit p: Parameters) extends Bundle {
val master = (new NastiIO).flip
val slave = Vec(nSlaves, new NastiIO)
override def cloneType =
new NastiRouterIO(nSlaves).asInstanceOf[this.type]
}
/** Take a single Nasti master and route its requests to various slaves
* @param nSlaves the number of slaves
* @param routeSel a function which takes an address and produces
* a one-hot encoded selection of the slave to write to */
class NastiRouter(nSlaves: Int, routeSel: UInt => UInt)(implicit p: Parameters)
extends NastiModule {
val io = new NastiRouterIO(nSlaves)
val ar_route = routeSel(io.master.ar.bits.addr)
val aw_route = routeSel(io.master.aw.bits.addr)
var ar_ready = Bool(false)
var aw_ready = Bool(false)
var w_ready = Bool(false)
io.slave.zipWithIndex.foreach { case (s, i) =>
s.ar.valid := io.master.ar.valid && ar_route(i)
s.ar.bits := io.master.ar.bits
ar_ready = ar_ready || (s.ar.ready && ar_route(i))
s.aw.valid := io.master.aw.valid && aw_route(i)
s.aw.bits := io.master.aw.bits
aw_ready = aw_ready || (s.aw.ready && aw_route(i))
val chosen = Reg(init = Bool(false))
when (s.w.fire() && s.w.bits.last) { chosen := Bool(false) }
when (s.aw.fire()) { chosen := Bool(true) }
s.w.valid := io.master.w.valid && chosen
s.w.bits := io.master.w.bits
w_ready = w_ready || (s.w.ready && chosen)
}
val r_invalid = !ar_route.orR
val w_invalid = !aw_route.orR
val err_slave = Module(new NastiErrorSlave)
err_slave.io.ar.valid := r_invalid && io.master.ar.valid
err_slave.io.ar.bits := io.master.ar.bits
err_slave.io.aw.valid := w_invalid && io.master.aw.valid
err_slave.io.aw.bits := io.master.aw.bits
err_slave.io.w.valid := io.master.w.valid
err_slave.io.w.bits := io.master.w.bits
io.master.ar.ready := ar_ready || (r_invalid && err_slave.io.ar.ready)
io.master.aw.ready := aw_ready || (w_invalid && err_slave.io.aw.ready)
io.master.w.ready := w_ready || err_slave.io.w.ready
val b_arb = Module(new RRArbiter(new NastiWriteResponseChannel, nSlaves + 1))
val r_arb = Module(new JunctionsPeekingArbiter(
new NastiReadDataChannel, nSlaves + 1,
// we can unlock if it's the last beat
(r: NastiReadDataChannel) => r.last))
for (i <- 0 until nSlaves) {
b_arb.io.in(i) <> io.slave(i).b
r_arb.io.in(i) <> io.slave(i).r
}
b_arb.io.in(nSlaves) <> err_slave.io.b
r_arb.io.in(nSlaves) <> err_slave.io.r
io.master.b <> b_arb.io.out
io.master.r <> r_arb.io.out
}
/** Crossbar between multiple Nasti masters and slaves
* @param nMasters the number of Nasti masters
* @param nSlaves the number of Nasti slaves
* @param routeSel a function selecting the slave to route an address to */
class NastiCrossbar(nMasters: Int, nSlaves: Int, routeSel: UInt => UInt)
(implicit p: Parameters) extends NastiModule {
val io = new Bundle {
val masters = Vec(nMasters, new NastiIO).flip
val slaves = Vec(nSlaves, new NastiIO)
}
if (nMasters == 1) {
val router = Module(new NastiRouter(nSlaves, routeSel))
router.io.master <> io.masters.head
io.slaves <> router.io.slave
} else {
val routers = Vec.fill(nMasters) { Module(new NastiRouter(nSlaves, routeSel)).io }
val arbiters = Vec.fill(nSlaves) { Module(new NastiArbiter(nMasters)).io }
for (i <- 0 until nMasters) {
routers(i).master <> io.masters(i)
}
for (i <- 0 until nSlaves) {
arbiters(i).master <> Vec(routers.map(r => r.slave(i)))
io.slaves(i) <> arbiters(i).slave
}
}
}
class NastiInterconnectIO(val nMasters: Int, val nSlaves: Int)
(implicit p: Parameters) extends Bundle {
/* This is a bit confusing. The interconnect is a slave to the masters and
* a master to the slaves. Hence why the declarations seem to be backwards. */
val masters = Vec(nMasters, new NastiIO).flip
val slaves = Vec(nSlaves, new NastiIO)
override def cloneType =
new NastiInterconnectIO(nMasters, nSlaves).asInstanceOf[this.type]
}
abstract class NastiInterconnect(implicit p: Parameters) extends NastiModule()(p) {
val nMasters: Int
val nSlaves: Int
lazy val io = new NastiInterconnectIO(nMasters, nSlaves)
}
class NastiRecursiveInterconnect(val nMasters: Int, addrMap: AddrMap)
(implicit p: Parameters) extends NastiInterconnect()(p) {
def port(name: String) = io.slaves(addrMap.port(name))
val nSlaves = addrMap.numSlaves
val routeSel = (addr: UInt) =>
Cat(addrMap.entries.map(e => addrMap(e.name).containsAddress(addr)).reverse)
val xbar = Module(new NastiCrossbar(nMasters, addrMap.length, routeSel))
xbar.io.masters <> io.masters
io.slaves <> addrMap.entries.zip(xbar.io.slaves).flatMap {
case (entry, xbarSlave) => {
entry.region match {
case submap: AddrMap if submap.entries.isEmpty =>
val err_slave = Module(new NastiErrorSlave)
err_slave.io <> xbarSlave
None
case submap: AddrMap =>
val ic = Module(new NastiRecursiveInterconnect(1, submap))
ic.io.masters.head <> xbarSlave
ic.io.slaves
case r: MemRange =>
Some(xbarSlave)
}
}
}
}
class ChannelHelper(nChannels: Int)
(implicit val p: Parameters) extends HasNastiParameters {
val dataBytes = p(MIFDataBits) * p(MIFDataBeats) / 8
val chanSelBits = log2Ceil(nChannels)
val selOffset = log2Up(dataBytes)
val blockOffset = selOffset + chanSelBits
def getSelect(addr: UInt) =
if (nChannels > 1) addr(blockOffset - 1, selOffset) else UInt(0)
def getAddr(addr: UInt) =
if (nChannels > 1)
Cat(addr(nastiXAddrBits - 1, blockOffset), addr(selOffset - 1, 0))
else addr
}
class NastiMemoryInterconnect(
nBanksPerChannel: Int, nChannels: Int)
(implicit p: Parameters) extends NastiInterconnect()(p) {
val nBanks = nBanksPerChannel * nChannels
val nMasters = nBanks
val nSlaves = nChannels
val chanHelper = new ChannelHelper(nChannels)
def connectChannel(outer: NastiIO, inner: NastiIO) {
outer <> inner
outer.ar.bits.addr := chanHelper.getAddr(inner.ar.bits.addr)
outer.aw.bits.addr := chanHelper.getAddr(inner.aw.bits.addr)
}
for (i <- 0 until nChannels) {
/* Bank assignments to channels are strided so that consecutive banks
* map to different channels. That way, consecutive cache lines also
* map to different channels */
val banks = (i until nBanks by nChannels).map(j => io.masters(j))
val channelArb = Module(new NastiArbiter(nBanksPerChannel))
channelArb.io.master <> banks
connectChannel(io.slaves(i), channelArb.io.slave)
}
}
/** Allows users to switch between various memory configurations. Note that
* this is a dangerous operation: not only does switching the select input to
* this module violate Nasti, it also causes the memory of the machine to
* become garbled. It's expected that select only changes at boot time, as
* part of the memory controller configuration. */
class NastiMemorySelectorIO(val nBanks: Int, val maxMemChannels: Int, nConfigs: Int)
(implicit p: Parameters)
extends NastiInterconnectIO(nBanks, maxMemChannels) {
val select = UInt(INPUT, width = log2Up(nConfigs))
override def cloneType =
new NastiMemorySelectorIO(nMasters, nSlaves, nConfigs).asInstanceOf[this.type]
}
class NastiMemorySelector(nBanks: Int, maxMemChannels: Int, configs: Seq[Int])
(implicit p: Parameters)
extends NastiInterconnect()(p) {
val nMasters = nBanks
val nSlaves = maxMemChannels
val nConfigs = configs.size
override lazy val io = new NastiMemorySelectorIO(nBanks, maxMemChannels, nConfigs)
def muxOnSelect(up: DecoupledIO[Bundle], dn: DecoupledIO[Bundle], active: Bool): Unit = {
when (active) { dn.bits := up.bits }
when (active) { up.ready := dn.ready }
when (active) { dn.valid := up.valid }
}
def muxOnSelect(up: NastiIO, dn: NastiIO, active: Bool): Unit = {
muxOnSelect(up.aw, dn.aw, active)
muxOnSelect(up.w, dn.w, active)
muxOnSelect(dn.b, up.b, active)
muxOnSelect(up.ar, dn.ar, active)
muxOnSelect(dn.r, up.r, active)
}
def muxOnSelect(up: Vec[NastiIO], dn: Vec[NastiIO], active: Bool) : Unit = {
for (i <- 0 until up.size)
muxOnSelect(up(i), dn(i), active)
}
/* Disconnects a vector of Nasti ports, which involves setting them to
* invalid. Due to Chisel reasons, we need to also set the bits to 0 (since
* there can't be any unconnected inputs). */
def disconnectSlave(slave: Vec[NastiIO]) = {
slave.foreach{ m =>
m.aw.valid := Bool(false)
m.aw.bits := m.aw.bits.fromBits( UInt(0) )
m.w.valid := Bool(false)
m.w.bits := m.w.bits.fromBits( UInt(0) )
m.b.ready := Bool(false)
m.ar.valid := Bool(false)
m.ar.bits := m.ar.bits.fromBits( UInt(0) )
m.r.ready := Bool(false)
}
}
def disconnectMaster(master: Vec[NastiIO]) = {
master.foreach{ m =>
m.aw.ready := Bool(false)
m.w.ready := Bool(false)
m.b.valid := Bool(false)
m.b.bits := m.b.bits.fromBits( UInt(0) )
m.ar.ready := Bool(false)
m.r.valid := Bool(false)
m.r.bits := m.r.bits.fromBits( UInt(0) )
}
}
/* Provides default wires on all our outputs. */
disconnectMaster(io.masters)
disconnectSlave(io.slaves)
/* Constructs interconnects for each of the layouts suggested by the
* configuration and switches between them based on the select input. */
configs.zipWithIndex.foreach{ case (nChannels, select) =>
val nBanksPerChannel = nBanks / nChannels
val ic = Module(new NastiMemoryInterconnect(nBanksPerChannel, nChannels))
disconnectMaster(ic.io.slaves)
disconnectSlave(ic.io.masters)
muxOnSelect( io.masters, ic.io.masters, io.select === UInt(select))
muxOnSelect(ic.io.slaves, io.slaves, io.select === UInt(select))
}
}
class NastiMemoryDemux(nRoutes: Int)(implicit p: Parameters) extends NastiModule()(p) {
val io = new Bundle {
val master = (new NastiIO).flip
val slaves = Vec(nRoutes, new NastiIO)
val select = UInt(INPUT, log2Up(nRoutes))
}
def connectReqChannel[T <: Data](idx: Int, out: DecoupledIO[T], in: DecoupledIO[T]) {
out.valid := in.valid && io.select === UInt(idx)
out.bits := in.bits
when (io.select === UInt(idx)) { in.ready := out.ready }
}
def connectRespChannel[T <: Data](idx: Int, out: DecoupledIO[T], in: DecoupledIO[T]) {
when (io.select === UInt(idx)) { out.valid := in.valid }
when (io.select === UInt(idx)) { out.bits := in.bits }
in.ready := out.ready && io.select === UInt(idx)
}
io.master.ar.ready := Bool(false)
io.master.aw.ready := Bool(false)
io.master.w.ready := Bool(false)
io.master.r.valid := Bool(false)
io.master.r.bits := NastiReadDataChannel(id = UInt(0), data = UInt(0))
io.master.b.valid := Bool(false)
io.master.b.bits := NastiWriteResponseChannel(id = UInt(0))
io.slaves.zipWithIndex.foreach { case (slave, i) =>
connectReqChannel(i, slave.ar, io.master.ar)
connectReqChannel(i, slave.aw, io.master.aw)
connectReqChannel(i, slave.w, io.master.w)
connectRespChannel(i, io.master.r, slave.r)
connectRespChannel(i, io.master.b, slave.b)
}
}
object AsyncNastiTo {
// source(master) is in our clock domain, output is in the 'to' clock domain
def apply[T <: Data](to_clock: Clock, to_reset: Bool, source: NastiIO, depth: Int = 3, sync: Int = 2)(implicit p: Parameters): NastiIO = {
val sink = Wire(new NastiIO)
sink.aw <> AsyncDecoupledTo(to_clock, to_reset, source.aw, depth, sync)
sink.ar <> AsyncDecoupledTo(to_clock, to_reset, source.ar, depth, sync)
sink.w <> AsyncDecoupledTo(to_clock, to_reset, source.w, depth, sync)
source.b <> AsyncDecoupledFrom(to_clock, to_reset, sink.b, depth, sync)
source.r <> AsyncDecoupledFrom(to_clock, to_reset, sink.r, depth, sync)
sink
}
}
object AsyncNastiFrom {
// source(master) is in the 'from' clock domain, output is in our clock domain
def apply[T <: Data](from_clock: Clock, from_reset: Bool, source: NastiIO, depth: Int = 3, sync: Int = 2)(implicit p: Parameters): NastiIO = {
val sink = Wire(new NastiIO)
sink.aw <> AsyncDecoupledFrom(from_clock, from_reset, source.aw, depth, sync)
sink.ar <> AsyncDecoupledFrom(from_clock, from_reset, source.ar, depth, sync)
sink.w <> AsyncDecoupledFrom(from_clock, from_reset, source.w, depth, sync)
source.b <> AsyncDecoupledTo(from_clock, from_reset, sink.b, depth, sync)
source.r <> AsyncDecoupledTo(from_clock, from_reset, sink.r, depth, sync)
sink
}
}

View File

@ -0,0 +1 @@
package object junctions

View File

@ -0,0 +1,82 @@
package junctions
import Chisel._
import cde.{Parameters, Field}
class PociIO(implicit p: Parameters) extends HastiBundle()(p)
{
val paddr = UInt(OUTPUT, hastiAddrBits)
val pwrite = Bool(OUTPUT)
val psel = Bool(OUTPUT)
val penable = Bool(OUTPUT)
val pwdata = UInt(OUTPUT, hastiDataBits)
val prdata = UInt(INPUT, hastiDataBits)
val pready = Bool(INPUT)
val pslverr = Bool(INPUT)
}
class HastiToPociBridge(implicit p: Parameters) extends HastiModule()(p) {
val io = new Bundle {
val in = new HastiSlaveIO
val out = new PociIO
}
val s_idle :: s_setup :: s_access :: Nil = Enum(UInt(), 3)
val state = Reg(init = s_idle)
val transfer = io.in.hsel & io.in.htrans(1)
switch (state) {
is (s_idle) {
when (transfer) { state := s_setup }
}
is (s_setup) {
state := s_access
}
is (s_access) {
when (io.out.pready & ~transfer) { state := s_idle }
when (io.out.pready & transfer) { state := s_setup }
when (~io.out.pready) { state := s_access }
}
}
val haddr_reg = Reg(UInt(width = hastiAddrBits))
val hwrite_reg = Reg(UInt(width = 1))
when (transfer) {
haddr_reg := io.in.haddr
hwrite_reg := io.in.hwrite
}
io.out.paddr := haddr_reg
io.out.pwrite := hwrite_reg(0)
io.out.psel := (state =/= s_idle)
io.out.penable := (state === s_access)
io.out.pwdata := io.in.hwdata
io.in.hrdata := io.out.prdata
io.in.hready := ((state === s_access) & io.out.pready) | (state === s_idle)
io.in.hresp := io.out.pslverr
}
class PociBus(amap: Seq[UInt=>Bool])(implicit p: Parameters) extends HastiModule()(p)
{
val io = new Bundle {
val master = new PociIO().flip
val slaves = Vec(amap.size, new PociIO)
}
val psels = PriorityEncoderOH(
(io.slaves zip amap) map { case (s, afn) => {
s.paddr := io.master.paddr
s.pwrite := io.master.pwrite
s.pwdata := io.master.pwdata
afn(io.master.paddr) && io.master.psel
}})
(io.slaves zip psels) foreach { case (s, psel) => {
s.psel := psel
s.penable := io.master.penable && psel
} }
io.master.prdata := Mux1H(psels, io.slaves.map(_.prdata))
io.master.pready := Mux1H(psels, io.slaves.map(_.pready))
io.master.pslverr := Mux1H(psels, io.slaves.map(_.pslverr))
}

View File

@ -0,0 +1,70 @@
// See LICENSE for license details.
package junctions
import Chisel._
class SlowIO[T <: Data](val divisor_max: Int)(data: => T) extends Module
{
val io = new Bundle {
val out_fast = Decoupled(data).flip
val out_slow = Decoupled(data)
val in_fast = Decoupled(data)
val in_slow = Decoupled(data).flip
val clk_slow = Bool(OUTPUT)
val set_divisor = Valid(Bits(width = 32)).flip
val divisor = Bits(OUTPUT, 32)
}
require(divisor_max >= 8 && divisor_max <= 65536 && isPow2(divisor_max))
val divisor = Reg(init=UInt(divisor_max-1))
val d_shadow = Reg(init=UInt(divisor_max-1))
val hold = Reg(init=UInt(divisor_max/4-1))
val h_shadow = Reg(init=UInt(divisor_max/4-1))
when (io.set_divisor.valid) {
d_shadow := io.set_divisor.bits(log2Up(divisor_max)-1, 0)
h_shadow := io.set_divisor.bits(log2Up(divisor_max)-1+16, 16)
}
io.divisor := (hold << 16) | divisor
val count = Reg{UInt(width = log2Up(divisor_max))}
val myclock = Reg{Bool()}
count := count + UInt(1)
val rising = count === (divisor >> 1)
val falling = count === divisor
val held = count === (divisor >> 1) + hold
when (falling) {
divisor := d_shadow
hold := h_shadow
count := UInt(0)
myclock := Bool(false)
}
when (rising) {
myclock := Bool(true)
}
val in_slow_rdy = Reg(init=Bool(false))
val out_slow_val = Reg(init=Bool(false))
val out_slow_bits = Reg(data)
val fromhost_q = Module(new Queue(data,1))
fromhost_q.io.enq.valid := rising && (io.in_slow.valid && in_slow_rdy || this.reset)
fromhost_q.io.enq.bits := io.in_slow.bits
io.in_fast <> fromhost_q.io.deq
val tohost_q = Module(new Queue(data,1))
tohost_q.io.enq <> io.out_fast
tohost_q.io.deq.ready := rising && io.out_slow.ready && out_slow_val
when (held) {
in_slow_rdy := fromhost_q.io.enq.ready
out_slow_val := tohost_q.io.deq.valid
out_slow_bits := Mux(this.reset, fromhost_q.io.deq.bits, tohost_q.io.deq.bits)
}
io.in_slow.ready := in_slow_rdy
io.out_slow.valid := out_slow_val
io.out_slow.bits := out_slow_bits
io.clk_slow := myclock
}

View File

@ -0,0 +1,281 @@
package junctions
import Chisel._
import cde.Parameters
class SmiReq(val dataWidth: Int, val addrWidth: Int) extends Bundle {
val rw = Bool()
val addr = UInt(width = addrWidth)
val data = Bits(width = dataWidth)
override def cloneType =
new SmiReq(dataWidth, addrWidth).asInstanceOf[this.type]
}
/** Simple Memory Interface IO. Used to communicate with PCR and SCR
* @param dataWidth the width in bits of the data field
* @param addrWidth the width in bits of the addr field */
class SmiIO(val dataWidth: Int, val addrWidth: Int) extends Bundle {
val req = Decoupled(new SmiReq(dataWidth, addrWidth))
val resp = Decoupled(Bits(width = dataWidth)).flip
override def cloneType =
new SmiIO(dataWidth, addrWidth).asInstanceOf[this.type]
}
abstract class SmiPeripheral extends Module {
val dataWidth: Int
val addrWidth: Int
lazy val io = new SmiIO(dataWidth, addrWidth).flip
}
/** A simple sequential memory accessed through Smi */
class SmiMem(val dataWidth: Int, val memDepth: Int) extends SmiPeripheral {
// override
val addrWidth = log2Up(memDepth)
val mem = SeqMem(memDepth, Bits(width = dataWidth))
val ren = io.req.fire() && !io.req.bits.rw
val wen = io.req.fire() && io.req.bits.rw
when (wen) { mem.write(io.req.bits.addr, io.req.bits.data) }
val resp_valid = Reg(init = Bool(false))
when (io.resp.fire()) { resp_valid := Bool(false) }
when (io.req.fire()) { resp_valid := Bool(true) }
io.resp.valid := resp_valid
io.resp.bits := mem.read(io.req.bits.addr, ren)
io.req.ready := !resp_valid
}
/** Arbitrate among several Smi clients
* @param n the number of clients
* @param dataWidth Smi data width
* @param addrWidth Smi address width */
class SmiArbiter(val n: Int, val dataWidth: Int, val addrWidth: Int)
extends Module {
val io = new Bundle {
val in = Vec(n, new SmiIO(dataWidth, addrWidth)).flip
val out = new SmiIO(dataWidth, addrWidth)
}
val wait_resp = Reg(init = Bool(false))
val choice = Reg(UInt(width = log2Up(n)))
val req_arb = Module(new RRArbiter(new SmiReq(dataWidth, addrWidth), n))
req_arb.io.in <> io.in.map(_.req)
req_arb.io.out.ready := io.out.req.ready && !wait_resp
io.out.req.bits := req_arb.io.out.bits
io.out.req.valid := req_arb.io.out.valid && !wait_resp
when (io.out.req.fire()) {
choice := req_arb.io.chosen
wait_resp := Bool(true)
}
when (io.out.resp.fire()) { wait_resp := Bool(false) }
for ((resp, i) <- io.in.map(_.resp).zipWithIndex) {
resp.bits := io.out.resp.bits
resp.valid := io.out.resp.valid && choice === UInt(i)
}
io.out.resp.ready := io.in(choice).resp.ready
}
class SmiIONastiReadIOConverter(val dataWidth: Int, val addrWidth: Int)
(implicit p: Parameters) extends NastiModule()(p) {
val io = new Bundle {
val nasti = new NastiReadIO().flip
val smi = new SmiIO(dataWidth, addrWidth)
}
private val maxWordsPerBeat = nastiXDataBits / dataWidth
private val wordCountBits = log2Up(maxWordsPerBeat)
private val byteOffBits = log2Up(dataWidth / 8)
private val addrOffBits = addrWidth + byteOffBits
private def calcWordCount(size: UInt): UInt =
(UInt(1) << (size - UInt(byteOffBits))) - UInt(1)
val (s_idle :: s_read :: s_resp :: Nil) = Enum(Bits(), 3)
val state = Reg(init = s_idle)
val nWords = Reg(UInt(width = wordCountBits))
val nBeats = Reg(UInt(width = nastiXLenBits))
val addr = Reg(UInt(width = addrWidth))
val id = Reg(UInt(width = nastiRIdBits))
val byteOff = Reg(UInt(width = byteOffBits))
val recvInd = Reg(init = UInt(0, wordCountBits))
val sendDone = Reg(init = Bool(false))
val buffer = Reg(init = Vec.fill(maxWordsPerBeat) { Bits(0, dataWidth) })
io.nasti.ar.ready := (state === s_idle)
io.smi.req.valid := (state === s_read) && !sendDone
io.smi.req.bits.rw := Bool(false)
io.smi.req.bits.addr := addr
io.smi.resp.ready := (state === s_read)
io.nasti.r.valid := (state === s_resp)
io.nasti.r.bits := NastiReadDataChannel(
id = id,
data = buffer.asUInt,
last = (nBeats === UInt(0)))
when (io.nasti.ar.fire()) {
when (io.nasti.ar.bits.size < UInt(byteOffBits)) {
nWords := UInt(0)
} .otherwise {
nWords := calcWordCount(io.nasti.ar.bits.size)
}
nBeats := io.nasti.ar.bits.len
addr := io.nasti.ar.bits.addr(addrOffBits - 1, byteOffBits)
if (maxWordsPerBeat > 1)
recvInd := io.nasti.ar.bits.addr(wordCountBits + byteOffBits - 1, byteOffBits)
else
recvInd := UInt(0)
id := io.nasti.ar.bits.id
state := s_read
}
when (io.smi.req.fire()) {
addr := addr + UInt(1)
sendDone := (nWords === UInt(0))
}
when (io.smi.resp.fire()) {
recvInd := recvInd + UInt(1)
nWords := nWords - UInt(1)
buffer(recvInd) := io.smi.resp.bits
when (nWords === UInt(0)) { state := s_resp }
}
when (io.nasti.r.fire()) {
recvInd := UInt(0)
sendDone := Bool(false)
// clear all the registers in the buffer
buffer.foreach(_ := Bits(0))
nBeats := nBeats - UInt(1)
state := Mux(io.nasti.r.bits.last, s_idle, s_read)
}
}
class SmiIONastiWriteIOConverter(val dataWidth: Int, val addrWidth: Int)
(implicit p: Parameters) extends NastiModule()(p) {
val io = new Bundle {
val nasti = new NastiWriteIO().flip
val smi = new SmiIO(dataWidth, addrWidth)
}
private val dataBytes = dataWidth / 8
private val maxWordsPerBeat = nastiXDataBits / dataWidth
private val byteOffBits = log2Floor(dataBytes)
private val addrOffBits = addrWidth + byteOffBits
private val nastiByteOffBits = log2Ceil(nastiXDataBits / 8)
assert(!io.nasti.aw.valid || io.nasti.aw.bits.size >= UInt(byteOffBits),
"Nasti size must be >= Smi size")
val id = Reg(UInt(width = nastiWIdBits))
val addr = Reg(UInt(width = addrWidth))
val offset = Reg(UInt(width = nastiByteOffBits))
def makeStrobe(offset: UInt, size: UInt, strb: UInt) = {
val sizemask = (UInt(1) << (UInt(1) << size)) - UInt(1)
val bytemask = strb & (sizemask << offset)
Vec.tabulate(maxWordsPerBeat){i => bytemask(dataBytes * i)}.asUInt
}
val size = Reg(UInt(width = nastiXSizeBits))
val strb = Reg(UInt(width = maxWordsPerBeat))
val data = Reg(UInt(width = nastiXDataBits))
val last = Reg(Bool())
val s_idle :: s_data :: s_send :: s_ack :: s_resp :: Nil = Enum(Bits(), 5)
val state = Reg(init = s_idle)
io.nasti.aw.ready := (state === s_idle)
io.nasti.w.ready := (state === s_data)
io.smi.req.valid := (state === s_send) && strb(0)
io.smi.req.bits.rw := Bool(true)
io.smi.req.bits.addr := addr
io.smi.req.bits.data := data(dataWidth - 1, 0)
io.smi.resp.ready := (state === s_ack)
io.nasti.b.valid := (state === s_resp)
io.nasti.b.bits := NastiWriteResponseChannel(id)
val jump = if (maxWordsPerBeat > 1)
PriorityMux(strb(maxWordsPerBeat - 1, 1),
(1 until maxWordsPerBeat).map(UInt(_)))
else UInt(1)
when (io.nasti.aw.fire()) {
if (dataWidth == nastiXDataBits) {
addr := io.nasti.aw.bits.addr(addrOffBits - 1, byteOffBits)
} else {
addr := Cat(io.nasti.aw.bits.addr(addrOffBits - 1, nastiByteOffBits),
UInt(0, nastiByteOffBits - byteOffBits))
}
offset := io.nasti.aw.bits.addr(nastiByteOffBits - 1, 0)
id := io.nasti.aw.bits.id
size := io.nasti.aw.bits.size
last := Bool(false)
state := s_data
}
when (io.nasti.w.fire()) {
last := io.nasti.w.bits.last
strb := makeStrobe(offset, size, io.nasti.w.bits.strb)
data := io.nasti.w.bits.data
state := s_send
}
when (state === s_send) {
when (io.smi.req.ready || !strb(0)) {
strb := strb >> jump
data := data >> Cat(jump, UInt(0, log2Up(dataWidth)))
addr := addr + jump
when (strb(0)) { state := s_ack }
}
}
when (io.smi.resp.fire()) {
state := Mux(strb === UInt(0),
Mux(last, s_resp, s_data), s_send)
}
when (io.nasti.b.fire()) { state := s_idle }
}
/** Convert Nasti protocol to Smi protocol */
class SmiIONastiIOConverter(val dataWidth: Int, val addrWidth: Int)
(implicit p: Parameters) extends NastiModule()(p) {
val io = new Bundle {
val nasti = (new NastiIO).flip
val smi = new SmiIO(dataWidth, addrWidth)
}
require(isPow2(dataWidth), "SMI data width must be power of 2")
require(dataWidth <= nastiXDataBits,
"SMI data width must be less than or equal to NASTI data width")
val reader = Module(new SmiIONastiReadIOConverter(dataWidth, addrWidth))
reader.io.nasti <> io.nasti
val writer = Module(new SmiIONastiWriteIOConverter(dataWidth, addrWidth))
writer.io.nasti <> io.nasti
val arb = Module(new SmiArbiter(2, dataWidth, addrWidth))
arb.io.in(0) <> reader.io.smi
arb.io.in(1) <> writer.io.smi
io.smi <> arb.io.out
}

View File

@ -0,0 +1,187 @@
package junctions
import Chisel._
import NastiConstants._
import cde.Parameters
class StreamChannel(w: Int) extends Bundle {
val data = UInt(width = w)
val last = Bool()
override def cloneType = new StreamChannel(w).asInstanceOf[this.type]
}
class StreamIO(w: Int) extends Bundle {
val out = Decoupled(new StreamChannel(w))
val in = Decoupled(new StreamChannel(w)).flip
override def cloneType = new StreamIO(w).asInstanceOf[this.type]
}
class NastiIOStreamIOConverter(w: Int)(implicit p: Parameters) extends Module {
val io = new Bundle {
val nasti = (new NastiIO).flip
val stream = new StreamIO(w)
}
val streamSize = UInt(log2Up(w / 8))
assert(!io.nasti.ar.valid || io.nasti.ar.bits.size === streamSize,
"read channel wrong size on stream")
assert(!io.nasti.ar.valid || io.nasti.ar.bits.len === UInt(0) ||
io.nasti.ar.bits.burst === BURST_FIXED,
"read channel wrong burst type on stream")
assert(!io.nasti.aw.valid || io.nasti.aw.bits.size === streamSize,
"write channel wrong size on stream")
assert(!io.nasti.aw.valid || io.nasti.aw.bits.len === UInt(0) ||
io.nasti.aw.bits.burst === BURST_FIXED,
"write channel wrong burst type on stream")
assert(!io.nasti.w.valid || io.nasti.w.bits.strb.andR,
"write channel cannot take partial writes")
val read_id = Reg(io.nasti.ar.bits.id)
val read_cnt = Reg(io.nasti.ar.bits.len)
val reading = Reg(init = Bool(false))
io.nasti.ar.ready := !reading
io.nasti.r.valid := reading && io.stream.in.valid
io.nasti.r.bits := io.stream.in.bits
io.nasti.r.bits.resp := UInt(0)
io.nasti.r.bits.id := read_id
io.stream.in.ready := reading && io.nasti.r.ready
when (io.nasti.ar.fire()) {
read_id := io.nasti.ar.bits.id
read_cnt := io.nasti.ar.bits.len
reading := Bool(true)
}
when (io.nasti.r.fire()) {
when (read_cnt === UInt(0)) {
reading := Bool(false)
} .otherwise {
read_cnt := read_cnt - UInt(1)
}
}
val write_id = Reg(io.nasti.aw.bits.id)
val writing = Reg(init = Bool(false))
val write_resp = Reg(init = Bool(false))
io.nasti.aw.ready := !writing && !write_resp
io.nasti.w.ready := writing && io.stream.out.ready
io.stream.out.valid := writing && io.nasti.w.valid
io.stream.out.bits := io.nasti.w.bits
io.nasti.b.valid := write_resp
io.nasti.b.bits.resp := UInt(0)
io.nasti.b.bits.id := write_id
when (io.nasti.aw.fire()) {
write_id := io.nasti.aw.bits.id
writing := Bool(true)
}
when (io.nasti.w.fire() && io.nasti.w.bits.last) {
writing := Bool(false)
write_resp := Bool(true)
}
when (io.nasti.b.fire()) { write_resp := Bool(false) }
}
class StreamNarrower(win: Int, wout: Int) extends Module {
require(win > wout, "Stream narrower input width must be larger than input width")
require(win % wout == 0, "Stream narrower input width must be multiple of output width")
val io = new Bundle {
val in = Decoupled(new StreamChannel(win)).flip
val out = Decoupled(new StreamChannel(wout))
}
val n_pieces = win / wout
val buffer = Reg(Bits(width = win))
val (piece_idx, pkt_done) = Counter(io.out.fire(), n_pieces)
val pieces = Vec.tabulate(n_pieces) { i => buffer(wout * (i + 1) - 1, wout * i) }
val last_piece = (piece_idx === UInt(n_pieces - 1))
val sending = Reg(init = Bool(false))
val in_last = Reg(Bool())
when (io.in.fire()) {
buffer := io.in.bits.data
in_last := io.in.bits.last
sending := Bool(true)
}
when (pkt_done) { sending := Bool(false) }
io.out.valid := sending
io.out.bits.data := pieces(piece_idx)
io.out.bits.last := in_last && last_piece
io.in.ready := !sending
}
class StreamExpander(win: Int, wout: Int) extends Module {
require(win < wout, "Stream expander input width must be smaller than input width")
require(wout % win == 0, "Stream narrower output width must be multiple of input width")
val io = new Bundle {
val in = Decoupled(new StreamChannel(win)).flip
val out = Decoupled(new StreamChannel(wout))
}
val n_pieces = wout / win
val buffer = Reg(Vec(n_pieces, UInt(width = win)))
val last = Reg(Bool())
val collecting = Reg(init = Bool(true))
val (piece_idx, pkt_done) = Counter(io.in.fire(), n_pieces)
when (io.in.fire()) { buffer(piece_idx) := io.in.bits.data }
when (pkt_done) { last := io.in.bits.last; collecting := Bool(false) }
when (io.out.fire()) { collecting := Bool(true) }
io.in.ready := collecting
io.out.valid := !collecting
io.out.bits.data := buffer.asUInt
io.out.bits.last := last
}
object StreamUtils {
def connectStreams(a: StreamIO, b: StreamIO) {
a.in <> b.out
b.in <> a.out
}
}
trait Serializable {
def nbits: Int
}
class Serializer[T <: Data with Serializable](w: Int, typ: T) extends Module {
val io = new Bundle {
val in = Decoupled(typ).flip
val out = Decoupled(Bits(width = w))
}
val narrower = Module(new StreamNarrower(typ.nbits, w))
narrower.io.in.bits.data := io.in.bits.asUInt
narrower.io.in.bits.last := Bool(true)
narrower.io.in.valid := io.in.valid
io.in.ready := narrower.io.in.ready
io.out.valid := narrower.io.out.valid
io.out.bits := narrower.io.out.bits.data
narrower.io.out.ready := io.out.ready
}
class Deserializer[T <: Data with Serializable](w: Int, typ: T) extends Module {
val io = new Bundle {
val in = Decoupled(Bits(width = w)).flip
val out = Decoupled(typ)
}
val expander = Module(new StreamExpander(w, typ.nbits))
expander.io.in.valid := io.in.valid
expander.io.in.bits.data := io.in.bits
expander.io.in.bits.last := Bool(true)
io.in.ready := expander.io.in.ready
io.out.valid := expander.io.out.valid
io.out.bits := typ.cloneType.fromBits(expander.io.out.bits.data)
expander.io.out.ready := io.out.ready
}

View File

@ -0,0 +1,163 @@
package junctions.unittests
import Chisel._
import junctions._
import junctions.NastiConstants._
import cde.Parameters
class NastiDriver(dataWidth: Int, burstLen: Int, nBursts: Int)
(implicit p: Parameters) extends NastiModule {
val io = new Bundle {
val nasti = new NastiIO
val finished = Bool(OUTPUT)
val start = Bool(INPUT)
}
val dataBytes = dataWidth / 8
val nastiDataBytes = nastiXDataBits / 8
val (write_cnt, write_done) = Counter(io.nasti.w.fire(), burstLen)
val (read_cnt, read_done) = Counter(io.nasti.r.fire(), burstLen)
val (req_cnt, reqs_done) = Counter(read_done, nBursts)
val req_addr = Cat(req_cnt, UInt(0, log2Up(burstLen * dataBytes)))
val write_data = UInt(0x10000000L, dataWidth) | Cat(req_cnt, write_cnt)
val expected_data = UInt(0x10000000L, dataWidth) | Cat(req_cnt, read_cnt)
val (s_idle :: s_write_addr :: s_write_data :: s_write_stall :: s_write_resp ::
s_read_addr :: s_read_data :: s_read_stall :: s_done :: Nil) = Enum(Bits(), 9)
val state = Reg(init = s_idle)
val (stall_cnt, stall_done) = Counter(state === s_read_stall, 2)
io.nasti.aw.valid := (state === s_write_addr)
io.nasti.aw.bits := NastiWriteAddressChannel(
id = UInt(0),
addr = req_addr,
size = UInt(log2Up(dataBytes)),
len = UInt(burstLen - 1))
io.nasti.w.valid := (state === s_write_data)
io.nasti.w.bits := NastiWriteDataChannel(
data = Cat(write_data, write_data),
last = (write_cnt === UInt(burstLen - 1)))
io.nasti.b.ready := (state === s_write_resp)
io.nasti.ar.valid := (state === s_read_addr)
io.nasti.ar.bits := NastiReadAddressChannel(
id = UInt(0),
addr = req_addr,
size = UInt(log2Up(dataBytes)),
len = UInt(burstLen - 1))
io.nasti.r.ready := (state === s_read_data)
io.finished := (state === s_done)
when (state === s_idle && io.start) { state := s_write_addr }
when (io.nasti.aw.fire()) { state := s_write_data }
when (io.nasti.w.fire()) { state := s_write_stall }
when (state === s_write_stall) { state := s_write_data }
when (write_done) { state := s_write_resp }
when (io.nasti.b.fire()) { state := s_read_addr }
when (io.nasti.ar.fire()) { state := s_read_data }
when (io.nasti.r.fire()) { state := s_read_stall }
when (stall_done) { state := s_read_data }
when (read_done) { state := s_write_addr }
when (reqs_done) { state := s_done }
val full_addr = req_addr + (read_cnt << UInt(log2Up(dataBytes)))
val byteshift = full_addr(log2Up(nastiDataBytes) - 1, 0)
val bitshift = Cat(byteshift, UInt(0, 3))
val read_data = (io.nasti.r.bits.data >> bitshift) & Fill(dataWidth, UInt(1, 1))
assert(!io.nasti.r.valid || read_data === expected_data,
s"NastiDriver got wrong data")
}
class AtosConverterTestBackend(implicit p: Parameters) extends NastiModule()(p) {
val io = new Bundle {
val nasti = (new NastiIO).flip
val finished = Bool(OUTPUT)
}
val (s_waddr :: s_wdata :: s_wresp ::
s_raddr :: s_rresp :: s_done :: Nil) = Enum(Bits(), 6)
val state = Reg(init = s_waddr)
val n_words = 4
val test_data = Reg(Vec(n_words, UInt(width = nastiXDataBits)))
val req_id = Reg(UInt(width = nastiXIdBits))
val (w_count, w_last) = Counter(io.nasti.w.fire(), n_words)
val (r_count, r_last) = Counter(io.nasti.r.fire(), n_words)
when (io.nasti.aw.fire()) {
req_id := io.nasti.aw.bits.id
state := s_wdata
}
when (io.nasti.w.fire()) {
test_data(w_count) := io.nasti.w.bits.data
when (io.nasti.w.bits.last) { state := s_wresp }
}
when (io.nasti.b.fire()) { state := s_raddr }
when (io.nasti.ar.fire()) {
req_id := io.nasti.ar.bits.id
state := s_rresp
}
when (io.nasti.r.fire() && io.nasti.r.bits.last) { state := s_done }
io.nasti.aw.ready := (state === s_waddr)
io.nasti.w.ready := (state === s_wdata)
io.nasti.ar.ready := (state === s_raddr)
io.nasti.b.valid := (state === s_wresp)
io.nasti.b.bits := NastiWriteResponseChannel(id = req_id)
io.nasti.r.valid := (state === s_rresp)
io.nasti.r.bits := NastiReadDataChannel(
id = req_id,
data = test_data(r_count),
last = r_last)
io.finished := (state === s_done)
}
class AtosConverterTest(implicit val p: Parameters) extends UnitTest
with HasNastiParameters {
val frontend = Module(new NastiDriver(nastiXDataBits, 4, 1))
val backend = Module(new AtosConverterTestBackend)
val serdes = Module(new AtosSerdes(8))
val desser = Module(new AtosDesser(8))
val client_conv = Module(new AtosClientConverter)
val manager_conv = Module(new AtosManagerConverter)
client_conv.io.nasti <> frontend.io.nasti
serdes.io.wide <> client_conv.io.atos
desser.io.narrow <> serdes.io.narrow
manager_conv.io.atos <> desser.io.wide
backend.io.nasti <> manager_conv.io.nasti
frontend.io.start := io.start
io.finished := frontend.io.finished && backend.io.finished
}
class HastiTest(implicit p: Parameters) extends UnitTest {
val sram = Module(new HastiTestSRAM(8))
val bus = Module(new HastiBus(Seq(a => Bool(true))))
val conv = Module(new HastiMasterIONastiIOConverter)
val driver = Module(new NastiDriver(32, 8, 2))
bus.io.slaves(0) <> sram.io
bus.io.master <> conv.io.hasti
conv.io.nasti <> driver.io.nasti
io.finished := driver.io.finished
driver.io.start := io.start
}

View File

@ -0,0 +1,85 @@
package junctions.unittests
import Chisel._
import junctions._
import junctions.NastiConstants._
class MultiWidthFifoTest extends UnitTest {
val big2little = Module(new MultiWidthFifo(16, 8, 8))
val little2big = Module(new MultiWidthFifo(8, 16, 4))
val bl_send = Reg(init = Bool(false))
val lb_send = Reg(init = Bool(false))
val bl_recv = Reg(init = Bool(false))
val lb_recv = Reg(init = Bool(false))
val bl_finished = Reg(init = Bool(false))
val lb_finished = Reg(init = Bool(false))
val bl_data = Vec.tabulate(4){i => UInt((2 * i + 1) * 256 + 2 * i, 16)}
val lb_data = Vec.tabulate(8){i => UInt(i, 8)}
val (bl_send_cnt, bl_send_done) = Counter(big2little.io.in.fire(), 4)
val (lb_send_cnt, lb_send_done) = Counter(little2big.io.in.fire(), 8)
val (bl_recv_cnt, bl_recv_done) = Counter(big2little.io.out.fire(), 8)
val (lb_recv_cnt, lb_recv_done) = Counter(little2big.io.out.fire(), 4)
big2little.io.in.valid := bl_send
big2little.io.in.bits := bl_data(bl_send_cnt)
big2little.io.out.ready := bl_recv
little2big.io.in.valid := lb_send
little2big.io.in.bits := lb_data(lb_send_cnt)
little2big.io.out.ready := lb_recv
val bl_recv_data_idx = bl_recv_cnt >> UInt(1)
val bl_recv_data = Mux(bl_recv_cnt(0),
bl_data(bl_recv_data_idx)(15, 8),
bl_data(bl_recv_data_idx)(7, 0))
val lb_recv_data = Cat(
lb_data(Cat(lb_recv_cnt, UInt(1, 1))),
lb_data(Cat(lb_recv_cnt, UInt(0, 1))))
when (io.start) {
bl_send := Bool(true)
lb_send := Bool(true)
}
when (bl_send_done) {
bl_send := Bool(false)
bl_recv := Bool(true)
}
when (lb_send_done) {
lb_send := Bool(false)
lb_recv := Bool(true)
}
when (bl_recv_done) {
bl_recv := Bool(false)
bl_finished := Bool(true)
}
when (lb_recv_done) {
lb_recv := Bool(false)
lb_finished := Bool(true)
}
io.finished := bl_finished && lb_finished
val bl_start_recv = Reg(next = bl_send_done)
val lb_start_recv = Reg(next = lb_send_done)
assert(!little2big.io.out.valid || little2big.io.out.bits === lb_recv_data,
"Little to Big data mismatch")
assert(!big2little.io.out.valid || big2little.io.out.bits === bl_recv_data,
"Bit to Little data mismatch")
assert(!lb_start_recv || little2big.io.count === UInt(4),
"Little to Big count incorrect")
assert(!bl_start_recv || big2little.io.count === UInt(8),
"Big to Little count incorrect")
}

View File

@ -0,0 +1,111 @@
package junctions.unittests
import Chisel._
import junctions._
import junctions.NastiConstants._
import cde.Parameters
class NastiDemuxDriver(n: Int)(implicit p: Parameters) extends Module {
val io = new Bundle {
val start = Bool(INPUT)
val finished = Bool(OUTPUT)
val nasti = new NastiIO
val select = UInt(OUTPUT, log2Up(n))
}
val (s_idle :: s_write_addr :: s_write_data :: s_write_resp ::
s_read_addr :: s_read_resp :: s_done :: Nil) = Enum(Bits(), 7)
val state = Reg(init = s_idle)
val select = Reg(init = UInt(0, log2Up(n)))
when (state === s_idle && io.start) { state := s_write_addr }
when (io.nasti.aw.fire()) { state := s_write_data }
when (io.nasti.w.fire()) { state := s_write_resp }
when (io.nasti.b.fire()) { state := s_read_addr }
when (io.nasti.ar.fire()) { state := s_read_resp }
when (io.nasti.r.fire()) {
when (select === UInt(n - 1)) {
state := s_done
} .otherwise {
select := select + UInt(1)
state := s_write_addr
}
}
io.nasti.aw.valid := (state === s_write_addr)
io.nasti.aw.bits := NastiWriteAddressChannel(
id = UInt(0),
addr = UInt(0),
size = UInt("b011"))
io.nasti.w.valid := (state === s_write_data)
io.nasti.w.bits := NastiWriteDataChannel(data = select)
io.nasti.b.ready := (state === s_write_resp)
io.nasti.ar.valid := (state === s_read_addr)
io.nasti.ar.bits := NastiReadAddressChannel(
id = UInt(0),
addr = UInt(0),
size = UInt("b011"))
io.nasti.r.ready := (state === s_read_resp)
io.finished := (state === s_done)
io.select := select
assert(!io.nasti.r.valid || io.nasti.r.bits.data === select,
"NASTI DeMux test: Read data did not match")
}
class NastiDemuxSlave(implicit p: Parameters) extends NastiModule()(p) {
val io = (new NastiIO).flip
val (s_write_wait :: s_write_data :: s_write_resp ::
s_read_wait :: s_read_resp :: s_done :: Nil) = Enum(Bits(), 6)
val state = Reg(init = s_write_wait)
val value = Reg(UInt(width = 64))
val id = Reg(UInt(width = nastiXIdBits))
when (io.aw.fire()) {
id := io.aw.bits.id
state := s_write_data
}
when (io.w.fire()) {
value := io.w.bits.data
state := s_write_resp
}
when (io.b.fire()) { state := s_read_wait }
when (io.ar.fire()) {
id := io.ar.bits.id
state := s_read_resp
}
when (io.r.fire()) { state := s_done }
io.aw.ready := (state === s_write_wait)
io.w.ready := (state === s_write_data)
io.b.valid := (state === s_write_resp)
io.b.bits := NastiWriteResponseChannel(id = id)
io.ar.ready := (state === s_read_wait)
io.r.valid := (state === s_read_resp)
io.r.bits := NastiReadDataChannel(id = id, data = value)
}
class NastiMemoryDemuxTest(implicit p: Parameters) extends UnitTest {
val nSlaves = 4
val driver = Module(new NastiDemuxDriver(nSlaves))
driver.io.start := io.start
io.finished := driver.io.finished
val demux = Module(new NastiMemoryDemux(nSlaves))
demux.io.master <> driver.io.nasti
demux.io.select := driver.io.select
for (i <- 0 until nSlaves) {
val slave = Module(new NastiDemuxSlave)
slave.io <> demux.io.slaves(i)
}
}

View File

@ -0,0 +1,65 @@
package junctions.unittests
import Chisel._
import junctions._
import cde.{Field, Parameters}
abstract class UnitTest extends Module {
val io = new Bundle {
val finished = Bool(OUTPUT)
val start = Bool(INPUT)
}
when (io.start) {
printf(s"Started UnitTest ${this.getClass.getSimpleName}\n")
}
}
case object UnitTests extends Field[Parameters => Seq[UnitTest]]
class UnitTestSuite(implicit p: Parameters) extends Module {
val io = new Bundle {
val finished = Bool(OUTPUT)
}
val tests = p(UnitTests)(p)
val s_idle :: s_start :: s_wait :: s_done :: Nil = Enum(Bits(), 4)
val state = Reg(init = s_idle)
val test_idx = Reg(init = UInt(0, log2Up(tests.size)))
val test_finished = Vec(tests.map(_.io.finished))
when (state === s_idle) { state := s_start }
when (state === s_start) { state := s_wait }
when (state === s_wait && test_finished(test_idx)) {
state := s_start
test_idx := test_idx + UInt(1)
state := Mux(test_idx === UInt(tests.size - 1), s_done, s_start)
}
val timer = Module(new Timer(1000, tests.size))
timer.io.start.valid := Bool(false)
timer.io.stop.valid := Bool(false)
tests.zipWithIndex.foreach { case (mod, i) =>
mod.io.start := (state === s_start) && test_idx === UInt(i)
when (test_idx === UInt(i)) {
timer.io.start.valid := mod.io.start
timer.io.start.bits := UInt(i)
timer.io.stop.valid := mod.io.finished
timer.io.stop.bits := UInt(i)
}
}
io.finished := (state === s_done)
assert(!timer.io.timeout.valid, "UnitTest timed out")
}
object JunctionsUnitTests {
def apply(implicit p: Parameters): Seq[UnitTest] =
Seq(
Module(new MultiWidthFifoTest),
Module(new AtosConverterTest),
Module(new NastiMemoryDemuxTest),
Module(new HastiTest))
}

View File

@ -0,0 +1,365 @@
/// See LICENSE for license details.
package junctions
import Chisel._
import cde.Parameters
class ParameterizedBundle(implicit p: Parameters) extends Bundle {
override def cloneType = {
try {
this.getClass.getConstructors.head.newInstance(p).asInstanceOf[this.type]
} catch {
case e: java.lang.IllegalArgumentException =>
throwException("Unable to use ParamaterizedBundle.cloneType on " +
this.getClass + ", probably because " + this.getClass +
"() takes more than one argument. Consider overriding " +
"cloneType() on " + this.getClass, e)
}
}
}
class HellaFlowQueue[T <: Data](val entries: Int)(data: => T) extends Module {
val io = new QueueIO(data, entries)
require(entries > 1)
val do_flow = Wire(Bool())
val do_enq = io.enq.fire() && !do_flow
val do_deq = io.deq.fire() && !do_flow
val maybe_full = Reg(init=Bool(false))
val enq_ptr = Counter(do_enq, entries)._1
val (deq_ptr, deq_done) = Counter(do_deq, entries)
when (do_enq =/= do_deq) { maybe_full := do_enq }
val ptr_match = enq_ptr === deq_ptr
val empty = ptr_match && !maybe_full
val full = ptr_match && maybe_full
val atLeastTwo = full || enq_ptr - deq_ptr >= UInt(2)
do_flow := empty && io.deq.ready
val ram = SeqMem(entries, data)
when (do_enq) { ram.write(enq_ptr, io.enq.bits) }
val ren = io.deq.ready && (atLeastTwo || !io.deq.valid && !empty)
val raddr = Mux(io.deq.valid, Mux(deq_done, UInt(0), deq_ptr + UInt(1)), deq_ptr)
val ram_out_valid = Reg(next = ren)
io.deq.valid := Mux(empty, io.enq.valid, ram_out_valid)
io.enq.ready := !full
io.deq.bits := Mux(empty, io.enq.bits, ram.read(raddr, ren))
}
class HellaQueue[T <: Data](val entries: Int)(data: => T) extends Module {
val io = new QueueIO(data, entries)
val fq = Module(new HellaFlowQueue(entries)(data))
fq.io.enq <> io.enq
io.deq <> Queue(fq.io.deq, 1, pipe = true)
}
object HellaQueue {
def apply[T <: Data](enq: DecoupledIO[T], entries: Int) = {
val q = Module((new HellaQueue(entries)) { enq.bits })
q.io.enq.valid := enq.valid // not using <> so that override is allowed
q.io.enq.bits := enq.bits
enq.ready := q.io.enq.ready
q.io.deq
}
}
/** A generalized locking RR arbiter that addresses the limitations of the
* version in the Chisel standard library */
abstract class JunctionsAbstractLockingArbiter[T <: Data](typ: T, arbN: Int)
extends Module {
val io = new Bundle {
val in = Vec(arbN, Decoupled(typ.cloneType)).flip
val out = Decoupled(typ.cloneType)
}
def rotateLeft[T <: Data](norm: Vec[T], rot: UInt): Vec[T] = {
val n = norm.size
Vec.tabulate(n) { i =>
Mux(rot < UInt(n - i), norm(UInt(i) + rot), norm(rot - UInt(n - i)))
}
}
val lockIdx = Reg(init = UInt(0, log2Up(arbN)))
val locked = Reg(init = Bool(false))
val choice = PriorityMux(
rotateLeft(Vec(io.in.map(_.valid)), lockIdx + UInt(1)),
rotateLeft(Vec((0 until arbN).map(UInt(_))), lockIdx + UInt(1)))
val chosen = Mux(locked, lockIdx, choice)
for (i <- 0 until arbN) {
io.in(i).ready := io.out.ready && chosen === UInt(i)
}
io.out.valid := io.in(chosen).valid
io.out.bits := io.in(chosen).bits
}
/** This locking arbiter determines when it is safe to unlock
* by peeking at the data */
class JunctionsPeekingArbiter[T <: Data](
typ: T, arbN: Int,
canUnlock: T => Bool,
needsLock: Option[T => Bool] = None)
extends JunctionsAbstractLockingArbiter(typ, arbN) {
def realNeedsLock(data: T): Bool =
needsLock.map(_(data)).getOrElse(Bool(true))
when (io.out.fire()) {
when (!locked && realNeedsLock(io.out.bits)) {
lockIdx := choice
locked := Bool(true)
}
// the unlock statement takes precedent
when (canUnlock(io.out.bits)) {
locked := Bool(false)
}
}
}
/** This arbiter determines when it is safe to unlock by counting transactions */
class JunctionsCountingArbiter[T <: Data](
typ: T, arbN: Int, count: Int,
val needsLock: Option[T => Bool] = None)
extends JunctionsAbstractLockingArbiter(typ, arbN) {
def realNeedsLock(data: T): Bool =
needsLock.map(_(data)).getOrElse(Bool(true))
// if count is 1, you should use a non-locking arbiter
require(count > 1, "CountingArbiter cannot have count <= 1")
val lock_ctr = Counter(count)
when (io.out.fire()) {
when (!locked && realNeedsLock(io.out.bits)) {
lockIdx := choice
locked := Bool(true)
lock_ctr.inc()
}
when (locked) {
when (lock_ctr.inc()) { locked := Bool(false) }
}
}
}
class ReorderQueueWrite[T <: Data](dType: T, tagWidth: Int) extends Bundle {
val data = dType.cloneType
val tag = UInt(width = tagWidth)
override def cloneType =
new ReorderQueueWrite(dType, tagWidth).asInstanceOf[this.type]
}
class ReorderEnqueueIO[T <: Data](dType: T, tagWidth: Int)
extends DecoupledIO(new ReorderQueueWrite(dType, tagWidth)) {
override def cloneType =
new ReorderEnqueueIO(dType, tagWidth).asInstanceOf[this.type]
}
class ReorderDequeueIO[T <: Data](dType: T, tagWidth: Int) extends Bundle {
val valid = Bool(INPUT)
val tag = UInt(INPUT, tagWidth)
val data = dType.cloneType.asOutput
val matches = Bool(OUTPUT)
override def cloneType =
new ReorderDequeueIO(dType, tagWidth).asInstanceOf[this.type]
}
class ReorderQueue[T <: Data](dType: T, tagWidth: Int, size: Option[Int] = None)
extends Module {
val io = new Bundle {
val enq = new ReorderEnqueueIO(dType, tagWidth).flip
val deq = new ReorderDequeueIO(dType, tagWidth)
}
val tagSpaceSize = 1 << tagWidth
val actualSize = size.getOrElse(tagSpaceSize)
if (tagSpaceSize > actualSize) {
val roq_data = Reg(Vec(actualSize, dType))
val roq_tags = Reg(Vec(actualSize, UInt(width = tagWidth)))
val roq_free = Reg(init = Vec.fill(actualSize)(Bool(true)))
val roq_enq_addr = PriorityEncoder(roq_free)
val roq_matches = roq_tags.zip(roq_free)
.map { case (tag, free) => tag === io.deq.tag && !free }
val roq_deq_onehot = PriorityEncoderOH(roq_matches)
io.enq.ready := roq_free.reduce(_ || _)
io.deq.data := Mux1H(roq_deq_onehot, roq_data)
io.deq.matches := roq_matches.reduce(_ || _)
when (io.enq.valid && io.enq.ready) {
roq_data(roq_enq_addr) := io.enq.bits.data
roq_tags(roq_enq_addr) := io.enq.bits.tag
roq_free(roq_enq_addr) := Bool(false)
}
when (io.deq.valid) {
roq_free(OHToUInt(roq_deq_onehot)) := Bool(true)
}
println(s"Warning - using a CAM for ReorderQueue, tagBits: ${tagWidth} size: ${actualSize}")
} else {
val roq_data = Mem(tagSpaceSize, dType)
val roq_free = Reg(init = Vec.fill(tagSpaceSize)(Bool(true)))
io.enq.ready := roq_free(io.enq.bits.tag)
io.deq.data := roq_data(io.deq.tag)
io.deq.matches := !roq_free(io.deq.tag)
when (io.enq.valid && io.enq.ready) {
roq_data(io.enq.bits.tag) := io.enq.bits.data
roq_free(io.enq.bits.tag) := Bool(false)
}
when (io.deq.valid) {
roq_free(io.deq.tag) := Bool(true)
}
}
}
object DecoupledHelper {
def apply(rvs: Bool*) = new DecoupledHelper(rvs)
}
class DecoupledHelper(val rvs: Seq[Bool]) {
def fire(exclude: Bool, includes: Bool*) = {
(rvs.filter(_ ne exclude) ++ includes).reduce(_ && _)
}
}
class MultiWidthFifo(inW: Int, outW: Int, n: Int) extends Module {
val io = new Bundle {
val in = Decoupled(Bits(width = inW)).flip
val out = Decoupled(Bits(width = outW))
val count = UInt(OUTPUT, log2Up(n + 1))
}
if (inW == outW) {
val q = Module(new Queue(Bits(width = inW), n))
q.io.enq <> io.in
io.out <> q.io.deq
io.count := q.io.count
} else if (inW > outW) {
val nBeats = inW / outW
require(inW % outW == 0, s"MultiWidthFifo: in: $inW not divisible by out: $outW")
require(n % nBeats == 0, s"Cannot store $n output words when output beats is $nBeats")
val wdata = Reg(Vec(n / nBeats, Bits(width = inW)))
val rdata = Vec(wdata.flatMap { indat =>
(0 until nBeats).map(i => indat(outW * (i + 1) - 1, outW * i)) })
val head = Reg(init = UInt(0, log2Up(n / nBeats)))
val tail = Reg(init = UInt(0, log2Up(n)))
val size = Reg(init = UInt(0, log2Up(n + 1)))
when (io.in.fire()) {
wdata(head) := io.in.bits
head := head + UInt(1)
}
when (io.out.fire()) { tail := tail + UInt(1) }
size := MuxCase(size, Seq(
(io.in.fire() && io.out.fire()) -> (size + UInt(nBeats - 1)),
io.in.fire() -> (size + UInt(nBeats)),
io.out.fire() -> (size - UInt(1))))
io.out.valid := size > UInt(0)
io.out.bits := rdata(tail)
io.in.ready := size < UInt(n)
io.count := size
} else {
val nBeats = outW / inW
require(outW % inW == 0, s"MultiWidthFifo: out: $outW not divisible by in: $inW")
val wdata = Reg(Vec(n * nBeats, Bits(width = inW)))
val rdata = Vec.tabulate(n) { i =>
Cat(wdata.slice(i * nBeats, (i + 1) * nBeats).reverse)}
val head = Reg(init = UInt(0, log2Up(n * nBeats)))
val tail = Reg(init = UInt(0, log2Up(n)))
val size = Reg(init = UInt(0, log2Up(n * nBeats + 1)))
when (io.in.fire()) {
wdata(head) := io.in.bits
head := head + UInt(1)
}
when (io.out.fire()) { tail := tail + UInt(1) }
size := MuxCase(size, Seq(
(io.in.fire() && io.out.fire()) -> (size - UInt(nBeats - 1)),
io.in.fire() -> (size + UInt(1)),
io.out.fire() -> (size - UInt(nBeats))))
io.count := size >> UInt(log2Up(nBeats))
io.out.valid := io.count > UInt(0)
io.out.bits := rdata(tail)
io.in.ready := size < UInt(n * nBeats)
}
}
// ============
// Static timer
// ============
// Timer with a statically-specified period.
// Can take multiple inflight start-stop events with ID
// Will continue to count down as long as at least one event is inflight
class Timer(initCount: Int, maxInflight: Int) extends Module {
val io = new Bundle {
val start = Valid(UInt(width = log2Up(maxInflight))).flip
val stop = Valid(UInt(width = log2Up(maxInflight))).flip
val timeout = Valid(UInt(width = log2Up(maxInflight)))
}
val inflight = Reg(init = Vec.fill(maxInflight) { Bool(false) })
val countdown = Reg(UInt(width = log2Up(initCount)))
val active = inflight.reduce(_ || _)
when (active) {
countdown := countdown - UInt(1)
}
when (io.start.valid) {
inflight(io.start.bits) := Bool(true)
countdown := UInt(initCount - 1)
}
when (io.stop.valid) {
inflight(io.stop.bits) := Bool(false)
}
io.timeout.valid := countdown === UInt(0) && active
io.timeout.bits := PriorityEncoder(inflight)
assert(!io.stop.valid || inflight(io.stop.bits),
"Timer stop for transaction that's not inflight")
}
object Timer {
def apply(initCount: Int, start: Bool, stop: Bool): Bool = {
val timer = Module(new Timer(initCount, 1))
timer.io.start.valid := start
timer.io.start.bits := UInt(0)
timer.io.stop.valid := stop
timer.io.stop.bits := UInt(0)
timer.io.timeout.valid
}
}

View File

@ -0,0 +1,113 @@
// See LICENSE for license details.
package rocket
import Chisel._
import cde.{Parameters, Field}
import junctions.{ParameterizedBundle, DecoupledHelper}
class HellaCacheArbiter(n: Int)(implicit p: Parameters) extends Module
{
val io = new Bundle {
val requestor = Vec(n, new HellaCacheIO).flip
val mem = new HellaCacheIO
}
if (n == 1) {
io.mem <> io.requestor.head
} else {
val s1_id = Reg(UInt())
val s2_id = Reg(next=s1_id)
io.mem.invalidate_lr := io.requestor.map(_.invalidate_lr).reduce(_||_)
io.mem.req.valid := io.requestor.map(_.req.valid).reduce(_||_)
io.requestor(0).req.ready := io.mem.req.ready
for (i <- 1 until n)
io.requestor(i).req.ready := io.requestor(i-1).req.ready && !io.requestor(i-1).req.valid
for (i <- n-1 to 0 by -1) {
val req = io.requestor(i).req
def connect_s0() = {
io.mem.req.bits.cmd := req.bits.cmd
io.mem.req.bits.typ := req.bits.typ
io.mem.req.bits.addr := req.bits.addr
io.mem.req.bits.phys := req.bits.phys
io.mem.req.bits.tag := Cat(req.bits.tag, UInt(i, log2Up(n)))
s1_id := UInt(i)
}
def connect_s1() = {
io.mem.s1_kill := io.requestor(i).s1_kill
io.mem.s1_data := io.requestor(i).s1_data
}
if (i == n-1) {
connect_s0()
connect_s1()
} else {
when (req.valid) { connect_s0() }
when (s1_id === UInt(i)) { connect_s1() }
}
}
for (i <- 0 until n) {
val resp = io.requestor(i).resp
val tag_hit = io.mem.resp.bits.tag(log2Up(n)-1,0) === UInt(i)
resp.valid := io.mem.resp.valid && tag_hit
io.requestor(i).xcpt := io.mem.xcpt
io.requestor(i).ordered := io.mem.ordered
io.requestor(i).s2_nack := io.mem.s2_nack && s2_id === UInt(i)
resp.bits := io.mem.resp.bits
resp.bits.tag := io.mem.resp.bits.tag >> log2Up(n)
io.requestor(i).replay_next := io.mem.replay_next
}
}
}
class InOrderArbiter[T <: Data, U <: Data](reqTyp: T, respTyp: U, n: Int)
(implicit p: Parameters) extends Module {
val io = new Bundle {
val in_req = Vec(n, Decoupled(reqTyp)).flip
val in_resp = Vec(n, Decoupled(respTyp))
val out_req = Decoupled(reqTyp)
val out_resp = Decoupled(respTyp).flip
}
if (n > 1) {
val route_q = Module(new Queue(UInt(width = log2Up(n)), 2))
val req_arb = Module(new RRArbiter(reqTyp, n))
req_arb.io.in <> io.in_req
val req_helper = DecoupledHelper(
req_arb.io.out.valid,
route_q.io.enq.ready,
io.out_req.ready)
io.out_req.bits := req_arb.io.out.bits
io.out_req.valid := req_helper.fire(io.out_req.ready)
route_q.io.enq.bits := req_arb.io.chosen
route_q.io.enq.valid := req_helper.fire(route_q.io.enq.ready)
req_arb.io.out.ready := req_helper.fire(req_arb.io.out.valid)
val resp_sel = route_q.io.deq.bits
val resp_ready = io.in_resp(resp_sel).ready
val resp_helper = DecoupledHelper(
resp_ready,
route_q.io.deq.valid,
io.out_resp.valid)
val resp_valid = resp_helper.fire(resp_ready)
for (i <- 0 until n) {
io.in_resp(i).bits := io.out_resp.bits
io.in_resp(i).valid := resp_valid && resp_sel === UInt(i)
}
route_q.io.deq.ready := resp_helper.fire(route_q.io.deq.valid)
io.out_resp.ready := resp_helper.fire(io.out_resp.valid)
} else {
io.out_req <> io.in_req.head
io.in_resp.head <> io.out_resp
}
}

View File

@ -0,0 +1,82 @@
// See LICENSE for license details.
package rocket
import Chisel._
import Util._
import cde.Parameters
class TDRSelect(implicit p: Parameters) extends CoreBundle()(p) {
val tdrmode = Bool()
val reserved = UInt(width = xLen - 1 - log2Up(nTDR))
val tdrindex = UInt(width = log2Up(nTDR))
def nTDR = p(NBreakpoints)
}
class BPControl(implicit p: Parameters) extends CoreBundle()(p) {
val tdrtype = UInt(width = 4)
val bpamaskmax = UInt(width = 5)
val reserved = UInt(width = xLen-28)
val bpaction = UInt(width = 8)
val bpmatch = UInt(width = 4)
val m = Bool()
val h = Bool()
val s = Bool()
val u = Bool()
val r = Bool()
val w = Bool()
val x = Bool()
def tdrType = 1
def bpaMaskMax = 4
def enabled(mstatus: MStatus) = Cat(m, h, s, u)(mstatus.prv)
}
class BP(implicit p: Parameters) extends CoreBundle()(p) {
val control = new BPControl
val address = UInt(width = vaddrBits)
def mask(dummy: Int = 0) = {
var mask: UInt = control.bpmatch(1)
for (i <- 1 until control.bpaMaskMax)
mask = Cat(mask(i-1) && address(i-1), mask)
mask
}
def pow2AddressMatch(x: UInt) =
(~x | mask()) === (~address | mask())
}
class BreakpointUnit(implicit p: Parameters) extends CoreModule()(p) {
val io = new Bundle {
val status = new MStatus().asInput
val bp = Vec(p(NBreakpoints), new BP).asInput
val pc = UInt(INPUT, vaddrBits)
val ea = UInt(INPUT, vaddrBits)
val xcpt_if = Bool(OUTPUT)
val xcpt_ld = Bool(OUTPUT)
val xcpt_st = Bool(OUTPUT)
}
io.xcpt_if := false
io.xcpt_ld := false
io.xcpt_st := false
for (bp <- io.bp) {
when (bp.control.enabled(io.status)) {
when (bp.pow2AddressMatch(io.pc) && bp.control.x) { io.xcpt_if := true }
when (bp.pow2AddressMatch(io.ea) && bp.control.r) { io.xcpt_ld := true }
when (bp.pow2AddressMatch(io.ea) && bp.control.w) { io.xcpt_st := true }
}
}
if (!io.bp.isEmpty) for ((bpl, bph) <- io.bp zip io.bp.tail) {
def matches(x: UInt) = !(x < bpl.address) && x < bph.address
when (bph.control.enabled(io.status) && bph.control.bpmatch === 1) {
when (matches(io.pc) && bph.control.x) { io.xcpt_if := true }
when (matches(io.ea) && bph.control.r) { io.xcpt_ld := true }
when (matches(io.ea) && bph.control.w) { io.xcpt_st := true }
}
}
}

View File

@ -0,0 +1,269 @@
// See LICENSE for license details.
package rocket
import Chisel._
import junctions._
import cde.{Parameters, Field}
import Util._
import uncore.util._
case object BtbKey extends Field[BtbParameters]
case class BtbParameters(
nEntries: Int = 62,
nRAS: Int = 2,
updatesOutOfOrder: Boolean = false)
abstract trait HasBtbParameters extends HasCoreParameters {
val matchBits = pgIdxBits
val entries = p(BtbKey).nEntries
val nRAS = p(BtbKey).nRAS
val updatesOutOfOrder = p(BtbKey).updatesOutOfOrder
val nPages = ((1 max(log2Up(entries)))+1)/2*2 // control logic assumes 2 divides pages
val opaqueBits = log2Up(entries)
val nBHT = 1 << log2Up(entries*2)
}
abstract class BtbModule(implicit val p: Parameters) extends Module with HasBtbParameters
abstract class BtbBundle(implicit val p: Parameters) extends ParameterizedBundle()(p)
with HasBtbParameters
class RAS(nras: Int) {
def push(addr: UInt): Unit = {
when (count < nras) { count := count + 1 }
val nextPos = Mux(Bool(isPow2(nras)) || pos < nras-1, pos+1, UInt(0))
stack(nextPos) := addr
pos := nextPos
}
def peek: UInt = stack(pos)
def pop(): Unit = when (!isEmpty) {
count := count - 1
pos := Mux(Bool(isPow2(nras)) || pos > 0, pos-1, UInt(nras-1))
}
def clear(): Unit = count := UInt(0)
def isEmpty: Bool = count === UInt(0)
private val count = Reg(UInt(width = log2Up(nras+1)))
private val pos = Reg(UInt(width = log2Up(nras)))
private val stack = Reg(Vec(nras, UInt()))
}
class BHTResp(implicit p: Parameters) extends BtbBundle()(p) {
val history = UInt(width = log2Up(nBHT).max(1))
val value = UInt(width = 2)
}
// BHT contains table of 2-bit counters and a global history register.
// The BHT only predicts and updates when there is a BTB hit.
// The global history:
// - updated speculatively in fetch (if there's a BTB hit).
// - on a mispredict, the history register is reset (again, only if BTB hit).
// The counter table:
// - each counter corresponds with the address of the fetch packet ("fetch pc").
// - updated when a branch resolves (and BTB was a hit for that branch).
// The updating branch must provide its "fetch pc".
class BHT(nbht: Int)(implicit val p: Parameters) extends HasCoreParameters {
val nbhtbits = log2Up(nbht)
def get(addr: UInt, update: Bool): BHTResp = {
val res = Wire(new BHTResp)
val index = addr(nbhtbits+1, log2Up(coreInstBytes)) ^ history
res.value := table(index)
res.history := history
val taken = res.value(0)
when (update) { history := Cat(taken, history(nbhtbits-1,1)) }
res
}
def update(addr: UInt, d: BHTResp, taken: Bool, mispredict: Bool): Unit = {
val index = addr(nbhtbits+1, log2Up(coreInstBytes)) ^ d.history
table(index) := Cat(taken, (d.value(1) & d.value(0)) | ((d.value(1) | d.value(0)) & taken))
when (mispredict) { history := Cat(taken, d.history(nbhtbits-1,1)) }
}
private val table = Mem(nbht, UInt(width = 2))
val history = Reg(UInt(width = nbhtbits))
}
// BTB update occurs during branch resolution (and only on a mispredict).
// - "pc" is what future fetch PCs will tag match against.
// - "br_pc" is the PC of the branch instruction.
class BTBUpdate(implicit p: Parameters) extends BtbBundle()(p) {
val prediction = Valid(new BTBResp)
val pc = UInt(width = vaddrBits)
val target = UInt(width = vaddrBits)
val taken = Bool()
val isValid = Bool()
val isJump = Bool()
val isReturn = Bool()
val br_pc = UInt(width = vaddrBits)
}
// BHT update occurs during branch resolution on all conditional branches.
// - "pc" is what future fetch PCs will tag match against.
class BHTUpdate(implicit p: Parameters) extends BtbBundle()(p) {
val prediction = Valid(new BTBResp)
val pc = UInt(width = vaddrBits)
val taken = Bool()
val mispredict = Bool()
}
class RASUpdate(implicit p: Parameters) extends BtbBundle()(p) {
val isCall = Bool()
val isReturn = Bool()
val returnAddr = UInt(width = vaddrBits)
val prediction = Valid(new BTBResp)
}
// - "bridx" is the low-order PC bits of the predicted branch (after
// shifting off the lowest log(inst_bytes) bits off).
// - "mask" provides a mask of valid instructions (instructions are
// masked off by the predicted taken branch from the BTB).
class BTBResp(implicit p: Parameters) extends BtbBundle()(p) {
val taken = Bool()
val mask = Bits(width = fetchWidth)
val bridx = Bits(width = log2Up(fetchWidth))
val target = UInt(width = vaddrBits)
val entry = UInt(width = opaqueBits)
val bht = new BHTResp
}
class BTBReq(implicit p: Parameters) extends BtbBundle()(p) {
val addr = UInt(width = vaddrBits)
}
// fully-associative branch target buffer
// Higher-performance processors may cause BTB updates to occur out-of-order,
// which requires an extra CAM port for updates (to ensure no duplicates get
// placed in BTB).
class BTB(implicit p: Parameters) extends BtbModule {
val io = new Bundle {
val req = Valid(new BTBReq).flip
val resp = Valid(new BTBResp)
val btb_update = Valid(new BTBUpdate).flip
val bht_update = Valid(new BHTUpdate).flip
val ras_update = Valid(new RASUpdate).flip
}
val idxs = Reg(Vec(entries, UInt(width=matchBits - log2Up(coreInstBytes))))
val idxPages = Reg(Vec(entries, UInt(width=log2Up(nPages))))
val tgts = Reg(Vec(entries, UInt(width=matchBits - log2Up(coreInstBytes))))
val tgtPages = Reg(Vec(entries, UInt(width=log2Up(nPages))))
val pages = Reg(Vec(nPages, UInt(width=vaddrBits - matchBits)))
val pageValid = Reg(init = UInt(0, nPages))
val idxPagesOH = idxPages.map(UIntToOH(_)(nPages-1,0))
val tgtPagesOH = tgtPages.map(UIntToOH(_)(nPages-1,0))
val isValid = Reg(init = UInt(0, entries))
val isReturn = Reg(UInt(width = entries))
val isJump = Reg(UInt(width = entries))
val brIdx = Reg(Vec(entries, UInt(width=log2Up(fetchWidth))))
private def page(addr: UInt) = addr >> matchBits
private def pageMatch(addr: UInt) = {
val p = page(addr)
pageValid & pages.map(_ === p).asUInt
}
private def tagMatch(addr: UInt, pgMatch: UInt) = {
val idxMatch = idxs.map(_ === addr(matchBits-1, log2Up(coreInstBytes))).asUInt
val idxPageMatch = idxPagesOH.map(_ & pgMatch).map(_.orR).asUInt
idxMatch & idxPageMatch & isValid
}
val r_btb_update = Pipe(io.btb_update)
val update_target = io.req.bits.addr
val pageHit = pageMatch(io.req.bits.addr)
val hitsVec = tagMatch(io.req.bits.addr, pageHit)
val hits = hitsVec.asUInt
val updatePageHit = pageMatch(r_btb_update.bits.pc)
val updateHits = tagMatch(r_btb_update.bits.pc, updatePageHit)
val updateHit = if (updatesOutOfOrder) updateHits.orR else r_btb_update.bits.prediction.valid
val updateHitAddr = if (updatesOutOfOrder) OHToUInt(updateHits) else r_btb_update.bits.prediction.bits.entry
val nextRepl = Counter(r_btb_update.valid && !updateHit, entries)._1
val useUpdatePageHit = updatePageHit.orR
val usePageHit = pageHit.orR
val doIdxPageRepl = !useUpdatePageHit
val nextPageRepl = Reg(UInt(width = log2Ceil(nPages)))
val idxPageRepl = Mux(usePageHit, Cat(pageHit(nPages-2,0), pageHit(nPages-1)), UIntToOH(nextPageRepl))
val idxPageUpdateOH = Mux(useUpdatePageHit, updatePageHit, idxPageRepl)
val idxPageUpdate = OHToUInt(idxPageUpdateOH)
val idxPageReplEn = Mux(doIdxPageRepl, idxPageRepl, UInt(0))
val samePage = page(r_btb_update.bits.pc) === page(update_target)
val doTgtPageRepl = !samePage && !usePageHit
val tgtPageRepl = Mux(samePage, idxPageUpdateOH, Cat(idxPageUpdateOH(nPages-2,0), idxPageUpdateOH(nPages-1)))
val tgtPageUpdate = OHToUInt(Mux(usePageHit, pageHit, tgtPageRepl))
val tgtPageReplEn = Mux(doTgtPageRepl, tgtPageRepl, UInt(0))
when (r_btb_update.valid && (doIdxPageRepl || doTgtPageRepl)) {
val both = doIdxPageRepl && doTgtPageRepl
val next = nextPageRepl + Mux[UInt](both, 2, 1)
nextPageRepl := Mux(next >= nPages, next(0), next)
}
when (r_btb_update.valid) {
val waddr = Mux(updateHit, updateHitAddr, nextRepl)
val mask = UIntToOH(waddr)
idxs(waddr) := r_btb_update.bits.pc(matchBits-1, log2Up(coreInstBytes))
tgts(waddr) := update_target(matchBits-1, log2Up(coreInstBytes))
idxPages(waddr) := idxPageUpdate
tgtPages(waddr) := tgtPageUpdate
isValid := Mux(r_btb_update.bits.isValid, isValid | mask, isValid & ~mask)
isReturn := Mux(r_btb_update.bits.isReturn, isReturn | mask, isReturn & ~mask)
isJump := Mux(r_btb_update.bits.isJump, isJump | mask, isJump & ~mask)
if (fetchWidth > 1)
brIdx(waddr) := r_btb_update.bits.br_pc >> log2Up(coreInstBytes)
require(nPages % 2 == 0)
val idxWritesEven = !idxPageUpdate(0)
def writeBank(i: Int, mod: Int, en: UInt, data: UInt) =
for (i <- i until nPages by mod)
when (en(i)) { pages(i) := data }
writeBank(0, 2, Mux(idxWritesEven, idxPageReplEn, tgtPageReplEn),
Mux(idxWritesEven, page(r_btb_update.bits.pc), page(update_target)))
writeBank(1, 2, Mux(idxWritesEven, tgtPageReplEn, idxPageReplEn),
Mux(idxWritesEven, page(update_target), page(r_btb_update.bits.pc)))
pageValid := pageValid | tgtPageReplEn | idxPageReplEn
}
io.resp.valid := hits.orR
io.resp.bits.taken := true
io.resp.bits.target := Cat(Mux1H(Mux1H(hitsVec, tgtPagesOH), pages), Mux1H(hitsVec, tgts) << log2Up(coreInstBytes))
io.resp.bits.entry := OHToUInt(hits)
io.resp.bits.bridx := (if (fetchWidth > 1) Mux1H(hitsVec, brIdx) else UInt(0))
io.resp.bits.mask := Cat((UInt(1) << ~Mux(io.resp.bits.taken, ~io.resp.bits.bridx, UInt(0)))-1, UInt(1))
if (nBHT > 0) {
val bht = new BHT(nBHT)
val isBranch = !(hits & isJump).orR
val res = bht.get(io.req.bits.addr, io.req.valid && io.resp.valid && isBranch)
val update_btb_hit = io.bht_update.bits.prediction.valid
when (io.bht_update.valid && update_btb_hit) {
bht.update(io.bht_update.bits.pc, io.bht_update.bits.prediction.bits.bht, io.bht_update.bits.taken, io.bht_update.bits.mispredict)
}
when (!res.value(0) && isBranch) { io.resp.bits.taken := false }
io.resp.bits.bht := res
}
if (nRAS > 0) {
val ras = new RAS(nRAS)
val doPeek = (hits & isReturn).orR
when (!ras.isEmpty && doPeek) {
io.resp.bits.target := ras.peek
}
when (io.ras_update.valid) {
when (io.ras_update.bits.isCall) {
ras.push(io.ras_update.bits.returnAddr)
when (doPeek) {
io.resp.bits.target := io.ras_update.bits.returnAddr
}
}.elsewhen (io.ras_update.bits.isReturn && io.ras_update.bits.prediction.valid) {
ras.pop()
}
}
}
}

View File

@ -0,0 +1,61 @@
// See LICENSE for license details.
package rocket
package constants
import Chisel._
import scala.math._
trait ScalarOpConstants {
val MT_SZ = 3
val MT_X = BitPat("b???")
val MT_B = UInt("b000")
val MT_H = UInt("b001")
val MT_W = UInt("b010")
val MT_D = UInt("b011")
val MT_BU = UInt("b100")
val MT_HU = UInt("b101")
val MT_WU = UInt("b110")
def mtSize(mt: UInt) = mt(MT_SZ-2, 0)
def mtSigned(mt: UInt) = !mt(MT_SZ-1)
val SZ_BR = 3
val BR_X = BitPat("b???")
val BR_EQ = UInt(0, 3)
val BR_NE = UInt(1, 3)
val BR_J = UInt(2, 3)
val BR_N = UInt(3, 3)
val BR_LT = UInt(4, 3)
val BR_GE = UInt(5, 3)
val BR_LTU = UInt(6, 3)
val BR_GEU = UInt(7, 3)
val A1_X = BitPat("b??")
val A1_ZERO = UInt(0, 2)
val A1_RS1 = UInt(1, 2)
val A1_PC = UInt(2, 2)
val IMM_X = BitPat("b???")
val IMM_S = UInt(0, 3)
val IMM_SB = UInt(1, 3)
val IMM_U = UInt(2, 3)
val IMM_UJ = UInt(3, 3)
val IMM_I = UInt(4, 3)
val IMM_Z = UInt(5, 3)
val A2_X = BitPat("b??")
val A2_ZERO = UInt(0, 2)
val A2_SIZE = UInt(1, 2)
val A2_RS2 = UInt(2, 2)
val A2_IMM = UInt(3, 2)
val X = BitPat("b?")
val N = BitPat("b0")
val Y = BitPat("b1")
val SZ_DW = 1
val DW_X = X
val DW_32 = Bool(false)
val DW_64 = Bool(true)
val DW_XPR = DW_64
}

View File

@ -0,0 +1,589 @@
// See LICENSE for license details.
package rocket
import Chisel._
import Util._
import Instructions._
import cde.{Parameters, Field}
import uncore.devices._
import uncore.util._
import junctions.AddrMap
class MStatus extends Bundle {
val debug = Bool() // not truly part of mstatus, but convenient
val prv = UInt(width = PRV.SZ) // not truly part of mstatus, but convenient
val sd = Bool()
val zero3 = UInt(width = 31)
val sd_rv32 = Bool()
val zero2 = UInt(width = 2)
val vm = UInt(width = 5)
val zero1 = UInt(width = 4)
val mxr = Bool()
val pum = Bool()
val mprv = Bool()
val xs = UInt(width = 2)
val fs = UInt(width = 2)
val mpp = UInt(width = 2)
val hpp = UInt(width = 2)
val spp = UInt(width = 1)
val mpie = Bool()
val hpie = Bool()
val spie = Bool()
val upie = Bool()
val mie = Bool()
val hie = Bool()
val sie = Bool()
val uie = Bool()
}
class DCSR extends Bundle {
val xdebugver = UInt(width = 2)
val ndreset = Bool()
val fullreset = Bool()
val hwbpcount = UInt(width = 12)
val ebreakm = Bool()
val ebreakh = Bool()
val ebreaks = Bool()
val ebreaku = Bool()
val zero2 = Bool()
val stopcycle = Bool()
val stoptime = Bool()
val cause = UInt(width = 3)
val debugint = Bool()
val zero1 = Bool()
val halt = Bool()
val step = Bool()
val prv = UInt(width = PRV.SZ)
}
class MIP extends Bundle {
val rocc = Bool()
val meip = Bool()
val heip = Bool()
val seip = Bool()
val ueip = Bool()
val mtip = Bool()
val htip = Bool()
val stip = Bool()
val utip = Bool()
val msip = Bool()
val hsip = Bool()
val ssip = Bool()
val usip = Bool()
}
class PTBR(implicit p: Parameters) extends CoreBundle()(p) {
require(maxPAddrBits - pgIdxBits + asIdBits <= xLen)
val asid = UInt(width = asIdBits)
val ppn = UInt(width = maxPAddrBits - pgIdxBits)
}
object PRV
{
val SZ = 2
val U = 0
val S = 1
val H = 2
val M = 3
}
object CSR
{
// commands
val SZ = 3
val X = BitPat.dontCare(SZ)
val N = UInt(0,SZ)
val W = UInt(1,SZ)
val S = UInt(2,SZ)
val C = UInt(3,SZ)
val I = UInt(4,SZ)
val R = UInt(5,SZ)
val ADDRSZ = 12
}
class CSRFileIO(implicit p: Parameters) extends CoreBundle {
val prci = new PRCITileIO().flip
val rw = new Bundle {
val addr = UInt(INPUT, CSR.ADDRSZ)
val cmd = Bits(INPUT, CSR.SZ)
val rdata = Bits(OUTPUT, xLen)
val wdata = Bits(INPUT, xLen)
}
val csr_stall = Bool(OUTPUT)
val csr_xcpt = Bool(OUTPUT)
val eret = Bool(OUTPUT)
val singleStep = Bool(OUTPUT)
val status = new MStatus().asOutput
val ptbr = new PTBR().asOutput
val evec = UInt(OUTPUT, vaddrBitsExtended)
val exception = Bool(INPUT)
val retire = UInt(INPUT, log2Up(1+retireWidth))
val custom_mrw_csrs = Vec(nCustomMrwCsrs, UInt(INPUT, xLen))
val cause = UInt(INPUT, xLen)
val pc = UInt(INPUT, vaddrBitsExtended)
val badaddr = UInt(INPUT, vaddrBitsExtended)
val fatc = Bool(OUTPUT)
val time = UInt(OUTPUT, xLen)
val fcsr_rm = Bits(OUTPUT, FPConstants.RM_SZ)
val fcsr_flags = Valid(Bits(width = FPConstants.FLAGS_SZ)).flip
val rocc = new RoCCInterface().flip
val interrupt = Bool(OUTPUT)
val interrupt_cause = UInt(OUTPUT, xLen)
val bp = Vec(p(NBreakpoints), new BP).asOutput
}
class CSRFile(implicit p: Parameters) extends CoreModule()(p)
{
val io = new CSRFileIO
val reset_mstatus = Wire(init=new MStatus().fromBits(0))
reset_mstatus.mpp := PRV.M
reset_mstatus.prv := PRV.M
val reg_mstatus = Reg(init=reset_mstatus)
val new_prv = Wire(init = reg_mstatus.prv)
reg_mstatus.prv := legalizePrivilege(new_prv)
val reset_dcsr = Wire(init=new DCSR().fromBits(0))
reset_dcsr.xdebugver := 1
reset_dcsr.prv := PRV.M
val reg_dcsr = Reg(init=reset_dcsr)
val (supported_interrupts, delegable_interrupts) = {
val sup = Wire(init=new MIP().fromBits(0))
sup.ssip := Bool(p(UseVM))
sup.msip := true
sup.stip := Bool(p(UseVM))
sup.mtip := true
sup.meip := true
sup.seip := Bool(p(UseVM))
sup.rocc := usingRoCC
val del = Wire(init=sup)
del.msip := false
del.mtip := false
del.meip := false
(sup.asUInt, del.asUInt)
}
val delegable_exceptions = UInt(Seq(
Causes.misaligned_fetch,
Causes.fault_fetch,
Causes.breakpoint,
Causes.fault_load,
Causes.fault_store,
Causes.user_ecall).map(1 << _).sum)
val exception = io.exception || io.csr_xcpt
val reg_debug = Reg(init=Bool(false))
val reg_dpc = Reg(UInt(width = vaddrBitsExtended))
val reg_dscratch = Reg(UInt(width = xLen))
val reg_singleStepped = Reg(Bool())
when (io.retire(0) || exception) { reg_singleStepped := true }
when (!io.singleStep) { reg_singleStepped := false }
assert(!io.singleStep || io.retire <= UInt(1))
assert(!reg_singleStepped || io.retire === UInt(0))
val reg_tdrselect = Reg(new TDRSelect)
val reg_bp = Reg(Vec(1 << log2Up(p(NBreakpoints)), new BP))
val reg_mie = Reg(init=UInt(0, xLen))
val reg_mideleg = Reg(init=UInt(0, xLen))
val reg_medeleg = Reg(init=UInt(0, xLen))
val reg_mip = Reg(new MIP)
val reg_mepc = Reg(UInt(width = vaddrBitsExtended))
val reg_mcause = Reg(Bits(width = xLen))
val reg_mbadaddr = Reg(UInt(width = vaddrBitsExtended))
val reg_mscratch = Reg(Bits(width = xLen))
val reg_mtvec = Reg(init=UInt(p(MtvecInit), paddrBits min xLen))
val reg_sepc = Reg(UInt(width = vaddrBitsExtended))
val reg_scause = Reg(Bits(width = xLen))
val reg_sbadaddr = Reg(UInt(width = vaddrBitsExtended))
val reg_sscratch = Reg(Bits(width = xLen))
val reg_stvec = Reg(UInt(width = vaddrBits))
val reg_sptbr = Reg(new PTBR)
val reg_wfi = Reg(init=Bool(false))
val reg_fflags = Reg(UInt(width = 5))
val reg_frm = Reg(UInt(width = 3))
val reg_instret = WideCounter(64, io.retire)
val reg_cycle: UInt = if (enableCommitLog) { reg_instret } else { WideCounter(64) }
val mip = Wire(init=reg_mip)
mip.rocc := io.rocc.interrupt
val read_mip = mip.asUInt & supported_interrupts
val pending_interrupts = read_mip & reg_mie
val m_interrupts = Mux(!reg_debug && (reg_mstatus.prv < PRV.M || (reg_mstatus.prv === PRV.M && reg_mstatus.mie)), pending_interrupts & ~reg_mideleg, UInt(0))
val s_interrupts = Mux(!reg_debug && (reg_mstatus.prv < PRV.S || (reg_mstatus.prv === PRV.S && reg_mstatus.sie)), pending_interrupts & reg_mideleg, UInt(0))
val all_interrupts = m_interrupts | s_interrupts
val interruptMSB = BigInt(1) << (xLen-1)
val interruptCause = interruptMSB + PriorityEncoder(all_interrupts)
io.interrupt := all_interrupts.orR && !io.singleStep || reg_singleStepped
io.interrupt_cause := interruptCause
io.bp := reg_bp take p(NBreakpoints)
val debugIntCause = reg_mip.getWidth
// debug interrupts are only masked by being in debug mode
when (Bool(usingDebug) && reg_dcsr.debugint && !reg_debug) {
io.interrupt := true
io.interrupt_cause := interruptMSB + debugIntCause
}
val system_insn = io.rw.cmd === CSR.I
val cpu_ren = io.rw.cmd =/= CSR.N && !system_insn
val isa_string = "IM" +
(if (usingVM) "S" else "") +
(if (usingUser) "U" else "") +
(if (usingAtomics) "A" else "") +
(if (usingFPU) "FD" else "") +
(if (usingRoCC) "X" else "")
val isa = (BigInt(log2Ceil(xLen) - 4) << (xLen-2)) |
isa_string.map(x => 1 << (x - 'A')).reduce(_|_)
val read_mstatus = io.status.asUInt()(xLen-1,0)
val read_mapping = collection.mutable.LinkedHashMap[Int,Bits](
CSRs.tdrselect -> reg_tdrselect.asUInt,
CSRs.tdrdata1 -> reg_bp(reg_tdrselect.tdrindex).control.asUInt,
CSRs.tdrdata2 -> reg_bp(reg_tdrselect.tdrindex).address,
CSRs.mimpid -> UInt(0),
CSRs.marchid -> UInt(0),
CSRs.mvendorid -> UInt(0),
CSRs.mcycle -> reg_cycle,
CSRs.minstret -> reg_instret,
CSRs.mucounteren -> UInt(0),
CSRs.mutime_delta -> UInt(0),
CSRs.mucycle_delta -> UInt(0),
CSRs.muinstret_delta -> UInt(0),
CSRs.misa -> UInt(isa),
CSRs.mstatus -> read_mstatus,
CSRs.mtvec -> reg_mtvec,
CSRs.mip -> read_mip,
CSRs.mie -> reg_mie,
CSRs.mideleg -> reg_mideleg,
CSRs.medeleg -> reg_medeleg,
CSRs.mscratch -> reg_mscratch,
CSRs.mepc -> reg_mepc.sextTo(xLen),
CSRs.mbadaddr -> reg_mbadaddr.sextTo(xLen),
CSRs.mcause -> reg_mcause,
CSRs.mhartid -> io.prci.id)
if (usingDebug) {
read_mapping += CSRs.dcsr -> reg_dcsr.asUInt
read_mapping += CSRs.dpc -> reg_dpc.asUInt
read_mapping += CSRs.dscratch -> reg_dscratch.asUInt
}
if (usingFPU) {
read_mapping += CSRs.fflags -> reg_fflags
read_mapping += CSRs.frm -> reg_frm
read_mapping += CSRs.fcsr -> Cat(reg_frm, reg_fflags)
}
if (usingVM) {
val read_sie = reg_mie & reg_mideleg
val read_sip = read_mip & reg_mideleg
val read_sstatus = Wire(init=io.status)
read_sstatus.vm := 0
read_sstatus.mprv := 0
read_sstatus.mpp := 0
read_sstatus.hpp := 0
read_sstatus.mpie := 0
read_sstatus.hpie := 0
read_sstatus.mie := 0
read_sstatus.hie := 0
read_mapping += CSRs.sstatus -> (read_sstatus.asUInt())(xLen-1,0)
read_mapping += CSRs.sip -> read_sip.asUInt
read_mapping += CSRs.sie -> read_sie.asUInt
read_mapping += CSRs.sscratch -> reg_sscratch
read_mapping += CSRs.scause -> reg_scause
read_mapping += CSRs.sbadaddr -> reg_sbadaddr.sextTo(xLen)
read_mapping += CSRs.sptbr -> reg_sptbr.asUInt
read_mapping += CSRs.sepc -> reg_sepc.sextTo(xLen)
read_mapping += CSRs.stvec -> reg_stvec.sextTo(xLen)
read_mapping += CSRs.mscounteren -> UInt(0)
read_mapping += CSRs.mstime_delta -> UInt(0)
read_mapping += CSRs.mscycle_delta -> UInt(0)
read_mapping += CSRs.msinstret_delta -> UInt(0)
}
if (xLen == 32) {
read_mapping += CSRs.mcycleh -> (reg_cycle >> 32)
read_mapping += CSRs.minstreth -> (reg_instret >> 32)
read_mapping += CSRs.mutime_deltah -> UInt(0)
read_mapping += CSRs.mucycle_deltah -> UInt(0)
read_mapping += CSRs.muinstret_deltah -> UInt(0)
if (usingVM) {
read_mapping += CSRs.mstime_deltah -> UInt(0)
read_mapping += CSRs.mscycle_deltah -> UInt(0)
read_mapping += CSRs.msinstret_deltah -> UInt(0)
}
}
for (i <- 0 until nCustomMrwCsrs) {
val addr = 0xff0 + i
require(addr < (1 << CSR.ADDRSZ))
require(!read_mapping.contains(addr), "custom MRW CSR address " + i + " is already in use")
read_mapping += addr -> io.custom_mrw_csrs(i)
}
val decoded_addr = read_mapping map { case (k, v) => k -> (io.rw.addr === k) }
val addr_valid = decoded_addr.values.reduce(_||_)
val fp_csr =
if (usingFPU) decoded_addr(CSRs.fflags) || decoded_addr(CSRs.frm) || decoded_addr(CSRs.fcsr)
else Bool(false)
val csr_debug = Bool(usingDebug) && io.rw.addr(5)
val csr_addr_priv = Cat(io.rw.addr(6,5).andR, io.rw.addr(9,8))
val priv_sufficient = Cat(reg_debug, reg_mstatus.prv) >= csr_addr_priv
val read_only = io.rw.addr(11,10).andR
val cpu_wen = cpu_ren && io.rw.cmd =/= CSR.R && priv_sufficient
val wen = cpu_wen && !read_only
val wdata = (Mux(io.rw.cmd.isOneOf(CSR.S, CSR.C), io.rw.rdata, UInt(0)) |
Mux(io.rw.cmd =/= CSR.C, io.rw.wdata, UInt(0))) &
~Mux(io.rw.cmd === CSR.C, io.rw.wdata, UInt(0))
val do_system_insn = priv_sufficient && system_insn
val opcode = UInt(1) << io.rw.addr(2,0)
val insn_call = do_system_insn && opcode(0)
val insn_break = do_system_insn && opcode(1)
val insn_ret = do_system_insn && opcode(2)
val insn_sfence_vm = do_system_insn && opcode(4)
val insn_wfi = do_system_insn && opcode(5)
io.csr_xcpt := (cpu_wen && read_only) ||
(cpu_ren && (!priv_sufficient || !addr_valid || fp_csr && !io.status.fs.orR)) ||
(system_insn && !priv_sufficient) ||
insn_call || insn_break
when (insn_wfi) { reg_wfi := true }
when (pending_interrupts.orR) { reg_wfi := false }
val cause =
Mux(!io.csr_xcpt, io.cause,
Mux(insn_call, reg_mstatus.prv + Causes.user_ecall,
Mux[UInt](insn_break, Causes.breakpoint, Causes.illegal_instruction)))
val cause_lsbs = cause(log2Up(xLen)-1,0)
val causeIsDebugInt = cause(xLen-1) && cause_lsbs === debugIntCause
val causeIsDebugBreak = cause === Causes.breakpoint && Cat(reg_dcsr.ebreakm, reg_dcsr.ebreakh, reg_dcsr.ebreaks, reg_dcsr.ebreaku)(reg_mstatus.prv)
val trapToDebug = Bool(usingDebug) && (reg_singleStepped || causeIsDebugInt || causeIsDebugBreak || reg_debug)
val delegate = Bool(p(UseVM)) && reg_mstatus.prv < PRV.M && Mux(cause(xLen-1), reg_mideleg(cause_lsbs), reg_medeleg(cause_lsbs))
val debugTVec = Mux(reg_debug, UInt(0x808), UInt(0x800))
val tvec = Mux(trapToDebug, debugTVec, Mux(delegate, reg_stvec.sextTo(vaddrBitsExtended), reg_mtvec))
val epc = Mux(csr_debug, reg_dpc, Mux(Bool(p(UseVM)) && !csr_addr_priv(1), reg_sepc, reg_mepc))
io.fatc := insn_sfence_vm
io.evec := Mux(exception, tvec, epc)
io.ptbr := reg_sptbr
io.eret := insn_ret
io.singleStep := reg_dcsr.step && !reg_debug
io.status := reg_mstatus
io.status.sd := io.status.fs.andR || io.status.xs.andR
io.status.debug := reg_debug
if (xLen == 32)
io.status.sd_rv32 := io.status.sd
when (exception) {
val epc = ~(~io.pc | (coreInstBytes-1))
val pie = read_mstatus(reg_mstatus.prv)
val write_badaddr = cause isOneOf (Causes.breakpoint,
Causes.misaligned_load, Causes.misaligned_store, Causes.misaligned_fetch,
Causes.fault_load, Causes.fault_store, Causes.fault_fetch)
when (trapToDebug) {
reg_debug := true
reg_dpc := epc
reg_dcsr.cause := Mux(reg_singleStepped, UInt(4), Mux(causeIsDebugInt, UInt(3), UInt(1)))
reg_dcsr.prv := trimPrivilege(reg_mstatus.prv)
}.elsewhen (delegate) {
reg_sepc := epc
reg_scause := cause
when (write_badaddr) { reg_sbadaddr := io.badaddr }
reg_mstatus.spie := pie
reg_mstatus.spp := reg_mstatus.prv
reg_mstatus.sie := false
new_prv := PRV.S
}.otherwise {
reg_mepc := epc
reg_mcause := cause
when (write_badaddr) { reg_mbadaddr := io.badaddr }
reg_mstatus.mpie := pie
reg_mstatus.mpp := trimPrivilege(reg_mstatus.prv)
reg_mstatus.mie := false
new_prv := PRV.M
}
}
when (insn_ret) {
when (Bool(p(UseVM)) && !csr_addr_priv(1)) {
when (reg_mstatus.spp.toBool) { reg_mstatus.sie := reg_mstatus.spie }
reg_mstatus.spie := false
reg_mstatus.spp := PRV.U
new_prv := reg_mstatus.spp
}.elsewhen (csr_debug) {
new_prv := reg_dcsr.prv
reg_debug := false
}.otherwise {
when (reg_mstatus.mpp(1)) { reg_mstatus.mie := reg_mstatus.mpie }
.elsewhen (Bool(usingVM) && reg_mstatus.mpp(0)) { reg_mstatus.sie := reg_mstatus.mpie }
reg_mstatus.mpie := false
reg_mstatus.mpp := legalizePrivilege(PRV.U)
new_prv := reg_mstatus.mpp
}
}
assert(PopCount(insn_ret :: io.exception :: io.csr_xcpt :: Nil) <= 1, "these conditions must be mutually exclusive")
io.time := reg_cycle
io.csr_stall := reg_wfi
io.rw.rdata := Mux1H(for ((k, v) <- read_mapping) yield decoded_addr(k) -> v)
io.fcsr_rm := reg_frm
when (io.fcsr_flags.valid) {
reg_fflags := reg_fflags | io.fcsr_flags.bits
}
when (wen) {
when (decoded_addr(CSRs.mstatus)) {
val new_mstatus = new MStatus().fromBits(wdata)
reg_mstatus.mie := new_mstatus.mie
reg_mstatus.mpie := new_mstatus.mpie
if (usingUser) {
reg_mstatus.mprv := new_mstatus.mprv
reg_mstatus.mpp := trimPrivilege(new_mstatus.mpp)
if (usingVM) {
reg_mstatus.mxr := new_mstatus.mxr
reg_mstatus.pum := new_mstatus.pum
reg_mstatus.spp := new_mstatus.spp
reg_mstatus.spie := new_mstatus.spie
reg_mstatus.sie := new_mstatus.sie
}
}
if (usingVM) {
require(if (xLen == 32) pgLevels == 2 else pgLevels > 2 && pgLevels < 6)
val vm_on = 6 + pgLevels // TODO Sv48 support should imply Sv39 support
when (new_mstatus.vm === 0) { reg_mstatus.vm := 0 }
when (new_mstatus.vm === vm_on) { reg_mstatus.vm := vm_on }
}
if (usingVM || usingFPU) reg_mstatus.fs := Fill(2, new_mstatus.fs.orR)
if (usingRoCC) reg_mstatus.xs := Fill(2, new_mstatus.xs.orR)
}
when (decoded_addr(CSRs.mip)) {
val new_mip = new MIP().fromBits(wdata)
if (usingVM) {
reg_mip.ssip := new_mip.ssip
reg_mip.stip := new_mip.stip
}
}
when (decoded_addr(CSRs.mie)) { reg_mie := wdata & supported_interrupts }
when (decoded_addr(CSRs.mepc)) { reg_mepc := ~(~wdata | (coreInstBytes-1)) }
when (decoded_addr(CSRs.mscratch)) { reg_mscratch := wdata }
if (p(MtvecWritable))
when (decoded_addr(CSRs.mtvec)) { reg_mtvec := wdata >> 2 << 2 }
when (decoded_addr(CSRs.mcause)) { reg_mcause := wdata & UInt((BigInt(1) << (xLen-1)) + 31) /* only implement 5 LSBs and MSB */ }
when (decoded_addr(CSRs.mbadaddr)) { reg_mbadaddr := wdata(vaddrBitsExtended-1,0) }
if (usingFPU) {
when (decoded_addr(CSRs.fflags)) { reg_fflags := wdata }
when (decoded_addr(CSRs.frm)) { reg_frm := wdata }
when (decoded_addr(CSRs.fcsr)) { reg_fflags := wdata; reg_frm := wdata >> reg_fflags.getWidth }
}
if (usingDebug) {
when (decoded_addr(CSRs.dcsr)) {
val new_dcsr = new DCSR().fromBits(wdata)
reg_dcsr.halt := new_dcsr.halt
reg_dcsr.step := new_dcsr.step
reg_dcsr.ebreakm := new_dcsr.ebreakm
if (usingVM) reg_dcsr.ebreaks := new_dcsr.ebreaks
if (usingUser) reg_dcsr.ebreaku := new_dcsr.ebreaku
if (usingUser) reg_dcsr.prv := trimPrivilege(new_dcsr.prv)
}
when (decoded_addr(CSRs.dpc)) { reg_dpc := ~(~wdata | (coreInstBytes-1)) }
when (decoded_addr(CSRs.dscratch)) { reg_dscratch := wdata }
}
if (usingVM) {
when (decoded_addr(CSRs.sstatus)) {
val new_sstatus = new MStatus().fromBits(wdata)
reg_mstatus.sie := new_sstatus.sie
reg_mstatus.spie := new_sstatus.spie
reg_mstatus.spp := new_sstatus.spp
reg_mstatus.pum := new_sstatus.pum
reg_mstatus.fs := Fill(2, new_sstatus.fs.orR) // even without an FPU
if (usingRoCC) reg_mstatus.xs := Fill(2, new_sstatus.xs.orR)
}
when (decoded_addr(CSRs.sip)) {
val new_sip = new MIP().fromBits(wdata)
reg_mip.ssip := new_sip.ssip
}
when (decoded_addr(CSRs.sie)) { reg_mie := (reg_mie & ~reg_mideleg) | (wdata & reg_mideleg) }
when (decoded_addr(CSRs.sscratch)) { reg_sscratch := wdata }
when (decoded_addr(CSRs.sptbr)) { reg_sptbr.ppn := wdata(ppnBits-1,0) }
when (decoded_addr(CSRs.sepc)) { reg_sepc := ~(~wdata | (coreInstBytes-1)) }
when (decoded_addr(CSRs.stvec)) { reg_stvec := wdata >> 2 << 2 }
when (decoded_addr(CSRs.scause)) { reg_scause := wdata & UInt((BigInt(1) << (xLen-1)) + 31) /* only implement 5 LSBs and MSB */ }
when (decoded_addr(CSRs.sbadaddr)) { reg_sbadaddr := wdata(vaddrBitsExtended-1,0) }
when (decoded_addr(CSRs.mideleg)) { reg_mideleg := wdata & delegable_interrupts }
when (decoded_addr(CSRs.medeleg)) { reg_medeleg := wdata & delegable_exceptions }
}
if (p(NBreakpoints) > 0) {
val newTDR = new TDRSelect().fromBits(wdata)
when (decoded_addr(CSRs.tdrselect)) { reg_tdrselect.tdrindex := newTDR.tdrindex }
when (reg_tdrselect.tdrmode || reg_debug) {
when (decoded_addr(CSRs.tdrdata1)) {
val newBPC = new BPControl().fromBits(wdata)
reg_bp(reg_tdrselect.tdrindex).control := newBPC
reg_bp(reg_tdrselect.tdrindex).control.bpmatch := newBPC.bpmatch & 2 /* exact/NAPOT only */
}
when (decoded_addr(CSRs.tdrdata2)) { reg_bp(reg_tdrselect.tdrindex).address := wdata }
}
}
}
reg_mip := io.prci.interrupts
reg_dcsr.debugint := io.prci.interrupts.debug
reg_dcsr.hwbpcount := UInt(p(NBreakpoints))
reg_sptbr.asid := 0
reg_tdrselect.reserved := 0
reg_tdrselect.tdrmode := true // TODO support D-mode breakpoint theft
if (reg_bp.isEmpty) reg_tdrselect.tdrindex := 0
for (bpc <- reg_bp map {_.control}) {
bpc.tdrtype := bpc.tdrType
bpc.bpamaskmax := bpc.bpaMaskMax
bpc.reserved := 0
bpc.bpaction := 0
bpc.h := false
if (!usingVM) bpc.s := false
if (!usingUser) bpc.u := false
if (!usingVM && !usingUser) bpc.m := true
when (reset) {
bpc.r := false
bpc.w := false
bpc.x := false
}
}
for (bp <- reg_bp drop p(NBreakpoints))
bp := new BP().fromBits(0)
def legalizePrivilege(priv: UInt): UInt =
if (usingVM) Mux(priv === PRV.H, PRV.U, priv)
else if (usingUser) Fill(2, priv(0))
else PRV.M
def trimPrivilege(priv: UInt): UInt =
if (usingVM) priv
else legalizePrivilege(priv)
}

View File

@ -0,0 +1,444 @@
// See LICENSE for license details.
package rocket
import Chisel._
import junctions._
import uncore.tilelink._
import uncore.agents._
import uncore.coherence._
import uncore.util._
import uncore.constants._
import cde.{Parameters, Field}
import Util._
class DCacheDataReq(implicit p: Parameters) extends L1HellaCacheBundle()(p) {
val addr = Bits(width = untagBits)
val write = Bool()
val wdata = Bits(width = rowBits)
val wmask = Bits(width = rowBytes)
val way_en = Bits(width = nWays)
}
class DCacheDataArray(implicit p: Parameters) extends L1HellaCacheModule()(p) {
val io = new Bundle {
val req = Valid(new DCacheDataReq).flip
val resp = Vec(nWays, Bits(OUTPUT, rowBits))
}
val addr = io.req.bits.addr >> rowOffBits
for (w <- 0 until nWays) {
val array = SeqMem(nSets*refillCycles, Vec(rowBytes, Bits(width=8)))
val valid = io.req.valid && (Bool(nWays == 1) || io.req.bits.way_en(w))
when (valid && io.req.bits.write) {
val data = Vec.tabulate(rowBytes)(i => io.req.bits.wdata(8*(i+1)-1, 8*i))
array.write(addr, data, io.req.bits.wmask.toBools)
}
io.resp(w) := array.read(addr, valid && !io.req.bits.write).asUInt
}
}
class DCache(implicit p: Parameters) extends L1HellaCacheModule()(p) {
val io = new Bundle {
val cpu = (new HellaCacheIO).flip
val ptw = new TLBPTWIO()
val mem = new ClientTileLinkIO
}
val fq = Module(new FinishQueue(1))
require(rowBits == encRowBits) // no ECC
require(refillCyclesPerBeat == 1)
require(rowBits >= coreDataBits)
// tags
val replacer = p(Replacer)()
def onReset = L1Metadata(UInt(0), ClientMetadata.onReset)
val meta = Module(new MetadataArray(onReset _))
val metaReadArb = Module(new Arbiter(new MetaReadReq, 3))
val metaWriteArb = Module(new Arbiter(new L1MetaWriteReq, 3))
meta.io.read <> metaReadArb.io.out
meta.io.write <> metaWriteArb.io.out
// data
val data = Module(new DCacheDataArray)
val dataArb = Module(new Arbiter(new DCacheDataReq, 4))
data.io.req <> dataArb.io.out
dataArb.io.out.ready := true
val s1_valid = Reg(next=io.cpu.req.fire(), init=Bool(false))
val s1_probe = Reg(next=io.mem.probe.fire(), init=Bool(false))
val probe_bits = RegEnable(io.mem.probe.bits, io.mem.probe.fire())
val s1_nack = Wire(init=Bool(false))
val s1_valid_masked = s1_valid && !io.cpu.s1_kill && !io.cpu.xcpt.asUInt.orR
val s1_valid_not_nacked = s1_valid_masked && !s1_nack
val s1_req = Reg(io.cpu.req.bits)
when (metaReadArb.io.out.valid) {
s1_req := io.cpu.req.bits
s1_req.addr := Cat(io.cpu.req.bits.addr >> untagBits, metaReadArb.io.out.bits.idx, io.cpu.req.bits.addr(blockOffBits-1,0))
}
val s1_read = isRead(s1_req.cmd)
val s1_write = isWrite(s1_req.cmd)
val s1_readwrite = s1_read || s1_write
val s1_flush_valid = Reg(Bool())
val s_ready :: s_grant_wait :: s_voluntary_writeback :: s_probe_rep_dirty :: s_probe_rep_clean :: s_probe_rep_miss :: s_voluntary_write_meta :: s_probe_write_meta :: Nil = Enum(UInt(), 8)
val grant_wait = Reg(init=Bool(false))
val release_ack_wait = Reg(init=Bool(false))
val release_state = Reg(init=s_ready)
val pstore1_valid = Wire(Bool())
val pstore2_valid = Reg(Bool())
val inWriteback = release_state.isOneOf(s_voluntary_writeback, s_probe_rep_dirty)
val releaseWay = Wire(UInt())
io.cpu.req.ready := (release_state === s_ready) && !grant_wait && !s1_nack
// hit initiation path
dataArb.io.in(3).valid := io.cpu.req.valid && isRead(io.cpu.req.bits.cmd)
dataArb.io.in(3).bits.write := false
dataArb.io.in(3).bits.addr := io.cpu.req.bits.addr
dataArb.io.in(3).bits.way_en := ~UInt(0, nWays)
when (!dataArb.io.in(3).ready && isRead(io.cpu.req.bits.cmd)) { io.cpu.req.ready := false }
metaReadArb.io.in(2).valid := io.cpu.req.valid
metaReadArb.io.in(2).bits.idx := io.cpu.req.bits.addr(idxMSB, idxLSB)
metaReadArb.io.in(2).bits.way_en := ~UInt(0, nWays)
when (!metaReadArb.io.in(2).ready) { io.cpu.req.ready := false }
// address translation
val tlb = Module(new TLB)
io.ptw <> tlb.io.ptw
tlb.io.req.valid := s1_valid_masked && s1_readwrite
tlb.io.req.bits.passthrough := s1_req.phys
tlb.io.req.bits.vpn := s1_req.addr >> pgIdxBits
tlb.io.req.bits.instruction := false
tlb.io.req.bits.store := s1_write
when (!tlb.io.req.ready && !io.cpu.req.bits.phys) { io.cpu.req.ready := false }
when (s1_valid && s1_readwrite && tlb.io.resp.miss) { s1_nack := true }
val s1_paddr = Cat(tlb.io.resp.ppn, s1_req.addr(pgIdxBits-1,0))
val s1_tag = Mux(s1_probe, probe_bits.addr_block >> idxBits, s1_paddr(paddrBits-1, untagBits))
val s1_hit_way = meta.io.resp.map(r => r.coh.isValid() && r.tag === s1_tag).asUInt
val s1_hit_state = ClientMetadata.onReset.fromBits(
meta.io.resp.map(r => Mux(r.tag === s1_tag, r.coh.asUInt, UInt(0)))
.reduce (_|_))
val s1_data_way = Mux(inWriteback, releaseWay, s1_hit_way)
val s1_data = Mux1H(s1_data_way, data.io.resp) // retime into s2 if critical
val s1_victim_way = Wire(init = replacer.way)
val s2_valid = Reg(next=s1_valid_masked, init=Bool(false))
val s2_probe = Reg(next=s1_probe, init=Bool(false))
val releaseInFlight = s1_probe || s2_probe || release_state =/= s_ready
val s2_valid_masked = s2_valid && Reg(next = !s1_nack)
val s2_req = Reg(io.cpu.req.bits)
val s2_uncached = Reg(Bool())
when (s1_valid_not_nacked || s1_flush_valid) {
s2_req := s1_req
s2_req.addr := s1_paddr
s2_uncached := !tlb.io.resp.cacheable
}
val s2_read = isRead(s2_req.cmd)
val s2_write = isWrite(s2_req.cmd)
val s2_readwrite = s2_read || s2_write
val s2_flush_valid = RegNext(s1_flush_valid)
val s2_data = RegEnable(s1_data, s1_valid || inWriteback)
val s2_probe_way = RegEnable(s1_hit_way, s1_probe)
val s2_probe_state = RegEnable(s1_hit_state, s1_probe)
val s2_hit_way = RegEnable(s1_hit_way, s1_valid_not_nacked)
val s2_hit_state = RegEnable(s1_hit_state, s1_valid_not_nacked)
val s2_hit = s2_hit_state.isHit(s2_req.cmd)
val s2_valid_hit = s2_valid_masked && s2_readwrite && s2_hit
val s2_valid_miss = s2_valid_masked && s2_readwrite && !s2_hit && !(pstore1_valid || pstore2_valid) && !release_ack_wait
val s2_valid_cached_miss = s2_valid_miss && !s2_uncached
val s2_victimize = s2_valid_cached_miss || s2_flush_valid
val s2_valid_uncached = s2_valid_miss && s2_uncached
val s2_victim_way = Mux(s2_hit_state.isValid() && !s2_flush_valid, s2_hit_way, UIntToOH(RegEnable(s1_victim_way, s1_valid_not_nacked || s1_flush_valid)))
val s2_victim_tag = RegEnable(meta.io.resp(s1_victim_way).tag, s1_valid_not_nacked || s1_flush_valid)
val s2_victim_state = Mux(s2_hit_state.isValid() && !s2_flush_valid, s2_hit_state, RegEnable(meta.io.resp(s1_victim_way).coh, s1_valid_not_nacked || s1_flush_valid))
val s2_victim_valid = s2_victim_state.isValid()
val s2_victim_dirty = s2_victim_state.requiresVoluntaryWriteback()
val s2_new_hit_state = s2_hit_state.onHit(s2_req.cmd)
val s2_update_meta = s2_hit_state =/= s2_new_hit_state
io.cpu.s2_nack := s2_valid && !s2_valid_hit && !(s2_valid_uncached && io.mem.acquire.ready)
when (s2_valid && (!s2_valid_hit || s2_update_meta)) { s1_nack := true }
// exceptions
val misaligned = new StoreGen(s1_req.typ, s1_req.addr, UInt(0), wordBytes).misaligned
io.cpu.xcpt.ma.ld := s1_read && misaligned
io.cpu.xcpt.ma.st := s1_write && misaligned
io.cpu.xcpt.pf.ld := s1_read && tlb.io.resp.xcpt_ld
io.cpu.xcpt.pf.st := s1_write && tlb.io.resp.xcpt_st
// load reservations
val s2_lr = Bool(usingAtomics) && s2_req.cmd === M_XLR
val s2_sc = Bool(usingAtomics) && s2_req.cmd === M_XSC
val lrscCount = Reg(init=UInt(0))
val lrscValid = lrscCount > 0
val lrscAddr = Reg(UInt())
val s2_sc_fail = s2_sc && !(lrscValid && lrscAddr === (s2_req.addr >> blockOffBits))
when (s2_valid_hit && s2_lr) {
lrscCount := lrscCycles - 1
lrscAddr := s2_req.addr >> blockOffBits
}
when (lrscValid) { lrscCount := lrscCount - 1 }
when ((s2_valid_hit && s2_sc) || io.cpu.invalidate_lr) { lrscCount := 0 }
// pending store buffer
val pstore1_cmd = RegEnable(s1_req.cmd, s1_valid_not_nacked && s1_write)
val pstore1_typ = RegEnable(s1_req.typ, s1_valid_not_nacked && s1_write)
val pstore1_addr = RegEnable(s1_paddr, s1_valid_not_nacked && s1_write)
val pstore1_data = RegEnable(io.cpu.s1_data, s1_valid_not_nacked && s1_write)
val pstore1_way = RegEnable(s1_hit_way, s1_valid_not_nacked && s1_write)
val pstore1_storegen = new StoreGen(pstore1_typ, pstore1_addr, pstore1_data, wordBytes)
val pstore1_storegen_data = Wire(init = pstore1_storegen.data)
val pstore1_amo = Bool(usingAtomics) && isRead(pstore1_cmd)
val pstore_drain_structural = pstore1_valid && pstore2_valid && ((s1_valid && s1_write) || pstore1_amo)
val pstore_drain_opportunistic = !(io.cpu.req.valid && isRead(io.cpu.req.bits.cmd))
val pstore_drain_on_miss = releaseInFlight || io.cpu.s2_nack
val pstore_drain =
Bool(usingAtomics) && pstore_drain_structural ||
(((pstore1_valid && !pstore1_amo) || pstore2_valid) && (pstore_drain_opportunistic || pstore_drain_on_miss))
pstore1_valid := {
val s2_store_valid = s2_valid_hit && s2_write && !s2_sc_fail
val pstore1_held = Reg(Bool())
assert(!s2_store_valid || !pstore1_held)
pstore1_held := (s2_store_valid || pstore1_held) && pstore2_valid && !pstore_drain
s2_store_valid || pstore1_held
}
val advance_pstore1 = pstore1_valid && (pstore2_valid === pstore_drain)
pstore2_valid := pstore2_valid && !pstore_drain || advance_pstore1
val pstore2_addr = RegEnable(pstore1_addr, advance_pstore1)
val pstore2_way = RegEnable(pstore1_way, advance_pstore1)
val pstore2_storegen_data = RegEnable(pstore1_storegen_data, advance_pstore1)
val pstore2_storegen_mask = RegEnable(pstore1_storegen.mask, advance_pstore1)
dataArb.io.in(0).valid := pstore_drain
dataArb.io.in(0).bits.write := true
dataArb.io.in(0).bits.addr := Mux(pstore2_valid, pstore2_addr, pstore1_addr)
dataArb.io.in(0).bits.way_en := Mux(pstore2_valid, pstore2_way, pstore1_way)
dataArb.io.in(0).bits.wdata := Fill(rowWords, Mux(pstore2_valid, pstore2_storegen_data, pstore1_storegen_data))
val pstore_mask_shift = Mux(pstore2_valid, pstore2_addr, pstore1_addr).extract(rowOffBits-1,offsetlsb) << wordOffBits
dataArb.io.in(0).bits.wmask := Mux(pstore2_valid, pstore2_storegen_mask, pstore1_storegen.mask) << pstore_mask_shift
// store->load RAW hazard detection
val s1_idx = s1_req.addr(idxMSB, wordOffBits)
val s1_raw_hazard = s1_read &&
((pstore1_valid && pstore1_addr(idxMSB, wordOffBits) === s1_idx) ||
(pstore2_valid && pstore2_addr(idxMSB, wordOffBits) === s1_idx))
when (s1_valid && s1_raw_hazard) { s1_nack := true }
metaWriteArb.io.in(0).valid := (s2_valid_hit && s2_update_meta) || (s2_victimize && !s2_victim_dirty)
metaWriteArb.io.in(0).bits.way_en := s2_victim_way
metaWriteArb.io.in(0).bits.idx := s2_req.addr(idxMSB, idxLSB)
metaWriteArb.io.in(0).bits.data.coh := Mux(s2_hit, s2_new_hit_state, ClientMetadata.onReset)
metaWriteArb.io.in(0).bits.data.tag := s2_req.addr(paddrBits-1, untagBits)
// acquire
val cachedGetMessage = s2_hit_state.makeAcquire(
client_xact_id = UInt(0),
addr_block = s2_req.addr(paddrBits-1, blockOffBits),
op_code = s2_req.cmd)
val uncachedGetMessage = Get(
client_xact_id = UInt(0),
addr_block = s2_req.addr(paddrBits-1, blockOffBits),
addr_beat = s2_req.addr(blockOffBits-1, beatOffBits),
addr_byte = s2_req.addr(beatOffBits-1, 0),
operand_size = s2_req.typ,
alloc = Bool(false))
val uncachedPutOffset = s2_req.addr.extract(beatOffBits-1, wordOffBits)
val uncachedPutMessage = Put(
client_xact_id = UInt(0),
addr_block = s2_req.addr(paddrBits-1, blockOffBits),
addr_beat = s2_req.addr(blockOffBits-1, beatOffBits),
data = Fill(beatWords, pstore1_storegen.data),
wmask = Some(pstore1_storegen.mask << (uncachedPutOffset << wordOffBits)),
alloc = Bool(false))
val uncachedPutAtomicMessage = PutAtomic(
client_xact_id = UInt(0),
addr_block = s2_req.addr(paddrBits-1, blockOffBits),
addr_beat = s2_req.addr(blockOffBits-1, beatOffBits),
addr_byte = s2_req.addr(beatOffBits-1, 0),
atomic_opcode = s2_req.cmd,
operand_size = s2_req.typ,
data = Fill(beatWords, pstore1_storegen.data))
io.mem.acquire.valid := ((s2_valid_cached_miss && !s2_victim_dirty) || s2_valid_uncached) && fq.io.enq.ready
io.mem.acquire.bits := cachedGetMessage
when (s2_uncached) {
assert(!s2_valid_masked || !s2_hit_state.isValid(), "cache hit on uncached access")
io.mem.acquire.bits := uncachedGetMessage
when (s2_write) {
io.mem.acquire.bits := uncachedPutMessage
when (pstore1_amo) {
io.mem.acquire.bits := uncachedPutAtomicMessage
}
}
}
when (io.mem.acquire.fire()) { grant_wait := true }
// grant
val grantIsRefill = io.mem.grant.bits.hasMultibeatData()
val grantIsVoluntary = io.mem.grant.bits.isVoluntary()
val grantIsUncached = !grantIsRefill && !grantIsVoluntary
when (io.mem.grant.valid) {
assert(grant_wait || grantIsVoluntary && release_ack_wait, "unexpected grant")
when (grantIsUncached) { s2_data := io.mem.grant.bits.data }
when (grantIsVoluntary) { release_ack_wait := false }
}
val (refillCount, refillDone) = Counter(io.mem.grant.fire() && grantIsRefill, refillCycles)
val grantDone = refillDone || grantIsUncached
when (io.mem.grant.fire() && grantDone) { grant_wait := false }
// data refill
dataArb.io.in(1).valid := grantIsRefill && io.mem.grant.valid
io.mem.grant.ready := true
assert(dataArb.io.in(1).ready || !dataArb.io.in(1).valid)
dataArb.io.in(1).bits.write := true
dataArb.io.in(1).bits.addr := Cat(s2_req.addr(paddrBits-1, blockOffBits), io.mem.grant.bits.addr_beat) << beatOffBits
dataArb.io.in(1).bits.way_en := s2_victim_way
dataArb.io.in(1).bits.wdata := io.mem.grant.bits.data
dataArb.io.in(1).bits.wmask := ~UInt(0, rowBytes)
// tag updates on refill
metaWriteArb.io.in(1).valid := refillDone
assert(!metaWriteArb.io.in(1).valid || metaWriteArb.io.in(1).ready)
metaWriteArb.io.in(1).bits.way_en := s2_victim_way
metaWriteArb.io.in(1).bits.idx := s2_req.addr(idxMSB, idxLSB)
metaWriteArb.io.in(1).bits.data.coh := s2_hit_state.onGrant(io.mem.grant.bits, s2_req.cmd)
metaWriteArb.io.in(1).bits.data.tag := s2_req.addr(paddrBits-1, untagBits)
// finish
fq.io.enq.valid := io.mem.grant.fire() && io.mem.grant.bits.requiresAck() && (!grantIsRefill || refillDone)
fq.io.enq.bits := io.mem.grant.bits.makeFinish()
io.mem.finish <> fq.io.deq
when (fq.io.enq.valid) { assert(fq.io.enq.ready) }
when (refillDone) { replacer.miss }
// probe
val block_probe = releaseInFlight || lrscValid || (s2_valid_hit && s2_lr)
metaReadArb.io.in(1).valid := io.mem.probe.valid && !block_probe
io.mem.probe.ready := metaReadArb.io.in(1).ready && !block_probe && !s1_valid && (!s2_valid || s2_valid_hit)
metaReadArb.io.in(1).bits.idx := io.mem.probe.bits.addr_block
metaReadArb.io.in(1).bits.way_en := ~UInt(0, nWays)
// release
val (writebackCount, writebackDone) = Counter(io.mem.release.fire() && inWriteback, refillCycles)
val releaseDone = writebackDone || (io.mem.release.fire() && !inWriteback)
val releaseRejected = io.mem.release.valid && !io.mem.release.ready
val s1_release_data_valid = Reg(next = dataArb.io.in(2).fire())
val s2_release_data_valid = Reg(next = s1_release_data_valid && !releaseRejected)
val releaseDataBeat = Cat(UInt(0), writebackCount) + Mux(releaseRejected, UInt(0), s1_release_data_valid + Cat(UInt(0), s2_release_data_valid))
io.mem.release.valid := s2_release_data_valid
io.mem.release.bits := ClientMetadata.onReset.makeRelease(probe_bits)
val voluntaryReleaseMessage = s2_victim_state.makeVoluntaryWriteback(UInt(0), UInt(0))
val voluntaryNewCoh = s2_victim_state.onCacheControl(M_FLUSH)
val probeResponseMessage = s2_probe_state.makeRelease(probe_bits)
val probeNewCoh = s2_probe_state.onProbe(probe_bits)
val newCoh = Wire(init = probeNewCoh)
releaseWay := s2_probe_way
when (s2_victimize && s2_victim_dirty) {
assert(!s2_hit_state.isValid())
release_state := s_voluntary_writeback
probe_bits.addr_block := Cat(s2_victim_tag, s2_req.addr(idxMSB, idxLSB))
}
when (s2_probe) {
when (s2_probe_state.requiresVoluntaryWriteback()) { release_state := s_probe_rep_dirty }
.elsewhen (s2_probe_state.isValid()) { release_state := s_probe_rep_clean }
.otherwise {
io.mem.release.valid := true
release_state := s_probe_rep_miss
}
}
when (releaseDone) { release_state := s_ready }
when (release_state.isOneOf(s_probe_rep_miss, s_probe_rep_clean)) {
io.mem.release.valid := true
}
when (release_state.isOneOf(s_probe_rep_clean, s_probe_rep_dirty)) {
io.mem.release.bits := probeResponseMessage
when (releaseDone) { release_state := s_probe_write_meta }
}
when (release_state.isOneOf(s_voluntary_writeback, s_voluntary_write_meta)) {
io.mem.release.bits := voluntaryReleaseMessage
newCoh := voluntaryNewCoh
releaseWay := s2_victim_way
when (releaseDone) {
release_state := s_voluntary_write_meta
release_ack_wait := true
}
}
when (s2_probe && !io.mem.release.fire()) { s1_nack := true }
io.mem.release.bits.addr_block := probe_bits.addr_block
io.mem.release.bits.addr_beat := writebackCount
io.mem.release.bits.data := s2_data
dataArb.io.in(2).valid := inWriteback && releaseDataBeat < refillCycles
dataArb.io.in(2).bits.write := false
dataArb.io.in(2).bits.addr := Cat(io.mem.release.bits.addr_block, releaseDataBeat(log2Up(refillCycles)-1,0)) << rowOffBits
dataArb.io.in(2).bits.way_en := ~UInt(0, nWays)
metaWriteArb.io.in(2).valid := release_state.isOneOf(s_voluntary_write_meta, s_probe_write_meta)
metaWriteArb.io.in(2).bits.way_en := releaseWay
metaWriteArb.io.in(2).bits.idx := io.mem.release.bits.full_addr()(idxMSB, idxLSB)
metaWriteArb.io.in(2).bits.data.coh := newCoh
metaWriteArb.io.in(2).bits.data.tag := io.mem.release.bits.full_addr()(paddrBits-1, untagBits)
when (metaWriteArb.io.in(2).fire()) { release_state := s_ready }
// cached response
io.cpu.resp.valid := s2_valid_hit
io.cpu.resp.bits := s2_req
io.cpu.resp.bits.has_data := s2_read
io.cpu.resp.bits.replay := false
io.cpu.ordered := !(s1_valid || s2_valid || grant_wait)
// uncached response
io.cpu.replay_next := io.mem.grant.valid && grantIsUncached
val doUncachedResp = Reg(next = io.cpu.replay_next)
when (doUncachedResp) {
assert(!s2_valid_hit)
io.cpu.resp.valid := true
io.cpu.resp.bits.replay := true
}
// load data subword mux/sign extension
val s2_word_idx = s2_req.addr.extract(log2Up(rowBits/8)-1, log2Up(wordBytes))
val s2_data_word = s2_data >> Cat(s2_word_idx, UInt(0, log2Up(coreDataBits)))
val loadgen = new LoadGen(s2_req.typ, mtSigned(s2_req.typ), s2_req.addr, s2_data_word, s2_sc, wordBytes)
io.cpu.resp.bits.data := loadgen.data | s2_sc_fail
io.cpu.resp.bits.data_word_bypass := loadgen.wordData
io.cpu.resp.bits.store_data := pstore1_data
// AMOs
if (usingAtomics) {
val amoalu = Module(new AMOALU(xLen))
amoalu.io.addr := pstore1_addr
amoalu.io.cmd := pstore1_cmd
amoalu.io.typ := pstore1_typ
amoalu.io.lhs := s2_data_word
amoalu.io.rhs := pstore1_data
pstore1_storegen_data := amoalu.io.out
} else {
assert(!(s1_valid_masked && s1_read && s1_write), "unsupported D$ operation")
}
// flushes
val flushed = Reg(init=Bool(true))
val flushing = Reg(init=Bool(false))
val flushCounter = Counter(nSets * nWays)
when (io.mem.acquire.fire()) { flushed := false }
when (s2_valid_masked && s2_req.cmd === M_FLUSH_ALL) {
io.cpu.s2_nack := !flushed
when (!flushed) {
flushing := !release_ack_wait
}
}
s1_flush_valid := metaReadArb.io.in(0).fire() && !s1_flush_valid && !s2_flush_valid && release_state === s_ready && !release_ack_wait
metaReadArb.io.in(0).valid := flushing
metaReadArb.io.in(0).bits.idx := flushCounter.value
metaReadArb.io.in(0).bits.way_en := ~UInt(0, nWays)
when (flushing) {
s1_victim_way := flushCounter.value >> log2Up(nSets)
when (s2_flush_valid) {
when (flushCounter.inc()) {
flushed := true
}
}
when (flushed && release_state === s_ready && !release_ack_wait) {
flushing := false
}
}
}

View File

@ -0,0 +1,203 @@
// See LICENSE for license details.
package rocket
import Chisel._
object DecodeLogic
{
def term(lit: BitPat) =
new Term(lit.value, BigInt(2).pow(lit.getWidth)-(lit.mask+1))
def logic(addr: UInt, addrWidth: Int, cache: scala.collection.mutable.Map[Term,Bool], terms: Seq[Term]) = {
terms.map { t =>
cache.getOrElseUpdate(t, (if (t.mask == 0) addr else addr & Bits(BigInt(2).pow(addrWidth)-(t.mask+1), addrWidth)) === Bits(t.value, addrWidth))
}.foldLeft(Bool(false))(_||_)
}
def apply(addr: UInt, default: BitPat, mapping: Iterable[(BitPat, BitPat)]): UInt = {
val cache = caches.getOrElseUpdate(addr, collection.mutable.Map[Term,Bool]())
val dterm = term(default)
val (keys, values) = mapping.unzip
val addrWidth = keys.map(_.getWidth).max
val terms = keys.toList.map(k => term(k))
val termvalues = terms zip values.toList.map(term(_))
for (t <- keys.zip(terms).tails; if !t.isEmpty)
for (u <- t.tail)
assert(!t.head._2.intersects(u._2), "DecodeLogic: keys " + t.head + " and " + u + " overlap")
Cat((0 until default.getWidth.max(values.map(_.getWidth).max)).map({ case (i: Int) =>
val mint = termvalues.filter { case (k,t) => ((t.mask >> i) & 1) == 0 && ((t.value >> i) & 1) == 1 }.map(_._1)
val maxt = termvalues.filter { case (k,t) => ((t.mask >> i) & 1) == 0 && ((t.value >> i) & 1) == 0 }.map(_._1)
val dc = termvalues.filter { case (k,t) => ((t.mask >> i) & 1) == 1 }.map(_._1)
if (((dterm.mask >> i) & 1) != 0) {
logic(addr, addrWidth, cache, SimplifyDC(mint, maxt, addrWidth))
} else {
val defbit = (dterm.value.toInt >> i) & 1
val t = if (defbit == 0) mint else maxt
val bit = logic(addr, addrWidth, cache, Simplify(t, dc, addrWidth))
if (defbit == 0) bit else ~bit
}
}).reverse)
}
def apply(addr: UInt, default: Seq[BitPat], mappingIn: Iterable[(BitPat, Seq[BitPat])]): Seq[UInt] = {
val mapping = collection.mutable.ArrayBuffer.fill(default.size)(collection.mutable.ArrayBuffer[(BitPat, BitPat)]())
for ((key, values) <- mappingIn)
for ((value, i) <- values zipWithIndex)
mapping(i) += key -> value
for ((thisDefault, thisMapping) <- default zip mapping)
yield apply(addr, thisDefault, thisMapping)
}
def apply(addr: UInt, default: Seq[BitPat], mappingIn: List[(UInt, Seq[BitPat])]): Seq[UInt] =
apply(addr, default, mappingIn.map(m => (BitPat(m._1), m._2)).asInstanceOf[Iterable[(BitPat, Seq[BitPat])]])
def apply(addr: UInt, trues: Iterable[UInt], falses: Iterable[UInt]): Bool =
apply(addr, BitPat.dontCare(1), trues.map(BitPat(_) -> BitPat("b1")) ++ falses.map(BitPat(_) -> BitPat("b0"))).toBool
private val caches = collection.mutable.Map[UInt,collection.mutable.Map[Term,Bool]]()
}
class Term(val value: BigInt, val mask: BigInt = 0)
{
var prime = true
def covers(x: Term) = ((value ^ x.value) &~ mask | x.mask &~ mask) == 0
def intersects(x: Term) = ((value ^ x.value) &~ mask &~ x.mask) == 0
override def equals(that: Any) = that match {
case x: Term => x.value == value && x.mask == mask
case _ => false
}
override def hashCode = value.toInt
def < (that: Term) = value < that.value || value == that.value && mask < that.mask
def similar(x: Term) = {
val diff = value - x.value
mask == x.mask && value > x.value && (diff & diff-1) == 0
}
def merge(x: Term) = {
prime = false
x.prime = false
val bit = value - x.value
new Term(value &~ bit, mask | bit)
}
override def toString = value.toString(16) + "-" + mask.toString(16) + (if (prime) "p" else "")
}
object Simplify
{
def getPrimeImplicants(implicants: Seq[Term], bits: Int) = {
var prime = List[Term]()
implicants.foreach(_.prime = true)
val cols = (0 to bits).map(b => implicants.filter(b == _.mask.bitCount))
val table = cols.map(c => (0 to bits).map(b => collection.mutable.Set(c.filter(b == _.value.bitCount):_*)))
for (i <- 0 to bits) {
for (j <- 0 until bits-i)
table(i)(j).foreach(a => table(i+1)(j) ++= table(i)(j+1).filter(_.similar(a)).map(_.merge(a)))
for (r <- table(i))
for (p <- r; if p.prime)
prime = p :: prime
}
prime.sortWith(_<_)
}
def getEssentialPrimeImplicants(prime: Seq[Term], minterms: Seq[Term]): (Seq[Term],Seq[Term],Seq[Term]) = {
for (i <- 0 until prime.size) {
val icover = minterms.filter(prime(i) covers _)
for (j <- 0 until prime.size) {
val jcover = minterms.filter(prime(j) covers _)
if (icover.size > jcover.size && jcover.forall(prime(i) covers _))
return getEssentialPrimeImplicants(prime.filter(_ != prime(j)), minterms)
}
}
val essentiallyCovered = minterms.filter(t => prime.count(_ covers t) == 1)
val essential = prime.filter(p => essentiallyCovered.exists(p covers _))
val nonessential = prime.filterNot(essential contains _)
val uncovered = minterms.filterNot(t => essential.exists(_ covers t))
if (essential.isEmpty || uncovered.isEmpty)
(essential, nonessential, uncovered)
else {
val (a, b, c) = getEssentialPrimeImplicants(nonessential, uncovered)
(essential ++ a, b, c)
}
}
def getCost(cover: Seq[Term], bits: Int) = cover.map(bits - _.mask.bitCount).sum
def cheaper(a: List[Term], b: List[Term], bits: Int) = {
val ca = getCost(a, bits)
val cb = getCost(b, bits)
def listLess(a: List[Term], b: List[Term]): Boolean = !b.isEmpty && (a.isEmpty || a.head < b.head || a.head == b.head && listLess(a.tail, b.tail))
ca < cb || ca == cb && listLess(a.sortWith(_<_), b.sortWith(_<_))
}
def getCover(implicants: Seq[Term], minterms: Seq[Term], bits: Int) = {
if (minterms.nonEmpty) {
val cover = minterms.map(m => implicants.filter(_.covers(m)).map(i => collection.mutable.Set(i)))
val all = cover.reduceLeft((c0, c1) => c0.map(a => c1.map(_ ++ a)).reduceLeft(_++_))
all.map(_.toList).reduceLeft((a, b) => if (cheaper(a, b, bits)) a else b)
} else
Seq[Term]()
}
def stringify(s: Seq[Term], bits: Int) = s.map(t => (0 until bits).map(i => if ((t.mask & (1 << i)) != 0) "x" else ((t.value >> i) & 1).toString).reduceLeft(_+_).reverse).reduceLeft(_+" + "+_)
def apply(minterms: Seq[Term], dontcares: Seq[Term], bits: Int) = {
val prime = getPrimeImplicants(minterms ++ dontcares, bits)
minterms.foreach(t => assert(prime.exists(_.covers(t))))
val (eprime, prime2, uncovered) = getEssentialPrimeImplicants(prime, minterms)
val cover = eprime ++ getCover(prime2, uncovered, bits)
minterms.foreach(t => assert(cover.exists(_.covers(t)))) // sanity check
cover
}
}
object SimplifyDC
{
def getImplicitDC(maxterms: Seq[Term], term: Term, bits: Int, above: Boolean): Term = {
for (i <- 0 until bits) {
var t: Term = null
if (above && ((term.value | term.mask) & (BigInt(1) << i)) == 0)
t = new Term(term.value | (BigInt(1) << i), term.mask)
else if (!above && (term.value & (BigInt(1) << i)) != 0)
t = new Term(term.value & ~(BigInt(1) << i), term.mask)
if (t != null && !maxterms.exists(_.intersects(t)))
return t
}
null
}
def getPrimeImplicants(minterms: Seq[Term], maxterms: Seq[Term], bits: Int) = {
var prime = List[Term]()
minterms.foreach(_.prime = true)
var mint = minterms.map(t => new Term(t.value, t.mask))
val cols = (0 to bits).map(b => mint.filter(b == _.mask.bitCount))
val table = cols.map(c => (0 to bits).map(b => collection.mutable.Set(c.filter(b == _.value.bitCount):_*)))
for (i <- 0 to bits) {
for (j <- 0 until bits-i) {
table(i)(j).foreach(a => table(i+1)(j) ++= table(i)(j+1).filter(_ similar a).map(_ merge a))
}
for (j <- 0 until bits-i) {
for (a <- table(i)(j).filter(_.prime)) {
val dc = getImplicitDC(maxterms, a, bits, true)
if (dc != null)
table(i+1)(j) += dc merge a
}
for (a <- table(i)(j+1).filter(_.prime)) {
val dc = getImplicitDC(maxterms, a, bits, false)
if (dc != null)
table(i+1)(j) += a merge dc
}
}
for (r <- table(i))
for (p <- r; if p.prime)
prime = p :: prime
}
prime.sortWith(_<_)
}
def verify(cover: Seq[Term], minterms: Seq[Term], maxterms: Seq[Term]) = {
assert(minterms.forall(t => cover.exists(_ covers t)))
assert(maxterms.forall(t => !cover.exists(_ intersects t)))
}
def apply(minterms: Seq[Term], maxterms: Seq[Term], bits: Int) = {
val prime = getPrimeImplicants(minterms, maxterms, bits)
val (eprime, prime2, uncovered) = Simplify.getEssentialPrimeImplicants(prime, minterms)
val cover = eprime ++ Simplify.getCover(prime2, uncovered, bits)
verify(cover, minterms, maxterms)
cover
}
}

View File

@ -0,0 +1,96 @@
// See LICENSE for license details.
package rocket
import Chisel._
import cde.{Parameters, Field}
import Instructions._
object ALU
{
val SZ_ALU_FN = 4
val FN_X = BitPat("b????")
val FN_ADD = UInt(0)
val FN_SL = UInt(1)
val FN_SEQ = UInt(2)
val FN_SNE = UInt(3)
val FN_XOR = UInt(4)
val FN_SR = UInt(5)
val FN_OR = UInt(6)
val FN_AND = UInt(7)
val FN_SUB = UInt(10)
val FN_SRA = UInt(11)
val FN_SLT = UInt(12)
val FN_SGE = UInt(13)
val FN_SLTU = UInt(14)
val FN_SGEU = UInt(15)
val FN_DIV = FN_XOR
val FN_DIVU = FN_SR
val FN_REM = FN_OR
val FN_REMU = FN_AND
val FN_MUL = FN_ADD
val FN_MULH = FN_SL
val FN_MULHSU = FN_SLT
val FN_MULHU = FN_SLTU
def isMulFN(fn: UInt, cmp: UInt) = fn(1,0) === cmp(1,0)
def isSub(cmd: UInt) = cmd(3)
def isCmp(cmd: UInt) = cmd === FN_SEQ || cmd === FN_SNE || cmd >= FN_SLT
def cmpUnsigned(cmd: UInt) = cmd(1)
def cmpInverted(cmd: UInt) = cmd(0)
def cmpEq(cmd: UInt) = !cmd(3)
}
import ALU._
class ALU(implicit p: Parameters) extends CoreModule()(p) {
val io = new Bundle {
val dw = Bits(INPUT, SZ_DW)
val fn = Bits(INPUT, SZ_ALU_FN)
val in2 = UInt(INPUT, xLen)
val in1 = UInt(INPUT, xLen)
val out = UInt(OUTPUT, xLen)
val adder_out = UInt(OUTPUT, xLen)
val cmp_out = Bool(OUTPUT)
}
// ADD, SUB
val in2_inv = Mux(isSub(io.fn), ~io.in2, io.in2)
val in1_xor_in2 = io.in1 ^ in2_inv
io.adder_out := io.in1 + in2_inv + isSub(io.fn)
// SLT, SLTU
io.cmp_out := cmpInverted(io.fn) ^
Mux(cmpEq(io.fn), in1_xor_in2 === UInt(0),
Mux(io.in1(xLen-1) === io.in2(xLen-1), io.adder_out(xLen-1),
Mux(cmpUnsigned(io.fn), io.in2(xLen-1), io.in1(xLen-1))))
// SLL, SRL, SRA
val (shamt, shin_r) =
if (xLen == 32) (io.in2(4,0), io.in1)
else {
require(xLen == 64)
val shin_hi_32 = Fill(32, isSub(io.fn) && io.in1(31))
val shin_hi = Mux(io.dw === DW_64, io.in1(63,32), shin_hi_32)
val shamt = Cat(io.in2(5) & (io.dw === DW_64), io.in2(4,0))
(shamt, Cat(shin_hi, io.in1(31,0)))
}
val shin = Mux(io.fn === FN_SR || io.fn === FN_SRA, shin_r, Reverse(shin_r))
val shout_r = (Cat(isSub(io.fn) & shin(xLen-1), shin).asSInt >> shamt)(xLen-1,0)
val shout_l = Reverse(shout_r)
val shout = Mux(io.fn === FN_SR || io.fn === FN_SRA, shout_r, UInt(0)) |
Mux(io.fn === FN_SL, shout_l, UInt(0))
// AND, OR, XOR
val logic = Mux(io.fn === FN_XOR || io.fn === FN_OR, in1_xor_in2, UInt(0)) |
Mux(io.fn === FN_OR || io.fn === FN_AND, io.in1 & io.in2, UInt(0))
val shift_logic = (isCmp(io.fn) && io.cmp_out) | logic | shout
val out = Mux(io.fn === FN_ADD || io.fn === FN_SUB, io.adder_out, shift_logic)
io.out := out
if (xLen > 32) {
require(xLen == 64)
when (io.dw === DW_32) { io.out := Cat(Fill(32, out(31)), out(31,0)) }
}
}

View File

@ -0,0 +1,654 @@
// See LICENSE for license details.
package rocket
import Chisel._
import Instructions._
import Util._
import FPConstants._
import uncore.constants.MemoryOpConstants._
import uncore.util._
import cde.{Parameters, Field}
case class FPUConfig(
divSqrt: Boolean = true,
sfmaLatency: Int = 2,
dfmaLatency: Int = 3
)
object FPConstants
{
val FCMD_ADD = BitPat("b0??00")
val FCMD_SUB = BitPat("b0??01")
val FCMD_MUL = BitPat("b0??10")
val FCMD_MADD = BitPat("b1??00")
val FCMD_MSUB = BitPat("b1??01")
val FCMD_NMSUB = BitPat("b1??10")
val FCMD_NMADD = BitPat("b1??11")
val FCMD_DIV = BitPat("b?0011")
val FCMD_SQRT = BitPat("b?1011")
val FCMD_SGNJ = BitPat("b??1?0")
val FCMD_MINMAX = BitPat("b?01?1")
val FCMD_CVT_FF = BitPat("b??0??")
val FCMD_CVT_IF = BitPat("b?10??")
val FCMD_CMP = BitPat("b?01??")
val FCMD_MV_XF = BitPat("b?11??")
val FCMD_CVT_FI = BitPat("b??0??")
val FCMD_MV_FX = BitPat("b??1??")
val FCMD_X = BitPat("b?????")
val FCMD_WIDTH = 5
val RM_SZ = 3
val FLAGS_SZ = 5
}
class FPUCtrlSigs extends Bundle
{
val cmd = Bits(width = FCMD_WIDTH)
val ldst = Bool()
val wen = Bool()
val ren1 = Bool()
val ren2 = Bool()
val ren3 = Bool()
val swap12 = Bool()
val swap23 = Bool()
val single = Bool()
val fromint = Bool()
val toint = Bool()
val fastpipe = Bool()
val fma = Bool()
val div = Bool()
val sqrt = Bool()
val round = Bool()
val wflags = Bool()
}
class FPUDecoder extends Module
{
val io = new Bundle {
val inst = Bits(INPUT, 32)
val sigs = new FPUCtrlSigs().asOutput
}
val decoder = DecodeLogic(io.inst,
List (FCMD_X, X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X),
Array(FLW -> List(FCMD_X, Y,Y,N,N,N,X,X,Y,N,N,N,N,N,N,N,N),
FLD -> List(FCMD_X, Y,Y,N,N,N,X,X,N,N,N,N,N,N,N,N,N),
FSW -> List(FCMD_MV_XF, Y,N,N,Y,N,Y,X,Y,N,Y,N,N,N,N,N,N),
FSD -> List(FCMD_MV_XF, Y,N,N,Y,N,Y,X,N,N,Y,N,N,N,N,N,N),
FMV_S_X -> List(FCMD_MV_FX, N,Y,N,N,N,X,X,Y,Y,N,N,N,N,N,Y,N),
FMV_D_X -> List(FCMD_MV_FX, N,Y,N,N,N,X,X,N,Y,N,N,N,N,N,Y,N),
FCVT_S_W -> List(FCMD_CVT_FI, N,Y,N,N,N,X,X,Y,Y,N,N,N,N,N,Y,Y),
FCVT_S_WU-> List(FCMD_CVT_FI, N,Y,N,N,N,X,X,Y,Y,N,N,N,N,N,Y,Y),
FCVT_S_L -> List(FCMD_CVT_FI, N,Y,N,N,N,X,X,Y,Y,N,N,N,N,N,Y,Y),
FCVT_S_LU-> List(FCMD_CVT_FI, N,Y,N,N,N,X,X,Y,Y,N,N,N,N,N,Y,Y),
FCVT_D_W -> List(FCMD_CVT_FI, N,Y,N,N,N,X,X,N,Y,N,N,N,N,N,Y,Y),
FCVT_D_WU-> List(FCMD_CVT_FI, N,Y,N,N,N,X,X,N,Y,N,N,N,N,N,Y,Y),
FCVT_D_L -> List(FCMD_CVT_FI, N,Y,N,N,N,X,X,N,Y,N,N,N,N,N,Y,Y),
FCVT_D_LU-> List(FCMD_CVT_FI, N,Y,N,N,N,X,X,N,Y,N,N,N,N,N,Y,Y),
FMV_X_S -> List(FCMD_MV_XF, N,N,Y,N,N,N,X,Y,N,Y,N,N,N,N,Y,N),
FMV_X_D -> List(FCMD_MV_XF, N,N,Y,N,N,N,X,N,N,Y,N,N,N,N,Y,N),
FCLASS_S -> List(FCMD_MV_XF, N,N,Y,N,N,N,X,Y,N,Y,N,N,N,N,Y,N),
FCLASS_D -> List(FCMD_MV_XF, N,N,Y,N,N,N,X,N,N,Y,N,N,N,N,Y,N),
FCVT_W_S -> List(FCMD_CVT_IF, N,N,Y,N,N,N,X,Y,N,Y,N,N,N,N,Y,Y),
FCVT_WU_S-> List(FCMD_CVT_IF, N,N,Y,N,N,N,X,Y,N,Y,N,N,N,N,Y,Y),
FCVT_L_S -> List(FCMD_CVT_IF, N,N,Y,N,N,N,X,Y,N,Y,N,N,N,N,Y,Y),
FCVT_LU_S-> List(FCMD_CVT_IF, N,N,Y,N,N,N,X,Y,N,Y,N,N,N,N,Y,Y),
FCVT_W_D -> List(FCMD_CVT_IF, N,N,Y,N,N,N,X,N,N,Y,N,N,N,N,Y,Y),
FCVT_WU_D-> List(FCMD_CVT_IF, N,N,Y,N,N,N,X,N,N,Y,N,N,N,N,Y,Y),
FCVT_L_D -> List(FCMD_CVT_IF, N,N,Y,N,N,N,X,N,N,Y,N,N,N,N,Y,Y),
FCVT_LU_D-> List(FCMD_CVT_IF, N,N,Y,N,N,N,X,N,N,Y,N,N,N,N,Y,Y),
FCVT_S_D -> List(FCMD_CVT_FF, N,Y,Y,N,N,N,X,Y,N,N,Y,N,N,N,Y,Y),
FCVT_D_S -> List(FCMD_CVT_FF, N,Y,Y,N,N,N,X,N,N,N,Y,N,N,N,Y,Y),
FEQ_S -> List(FCMD_CMP, N,N,Y,Y,N,N,N,Y,N,Y,N,N,N,N,N,Y),
FLT_S -> List(FCMD_CMP, N,N,Y,Y,N,N,N,Y,N,Y,N,N,N,N,N,Y),
FLE_S -> List(FCMD_CMP, N,N,Y,Y,N,N,N,Y,N,Y,N,N,N,N,N,Y),
FEQ_D -> List(FCMD_CMP, N,N,Y,Y,N,N,N,N,N,Y,N,N,N,N,N,Y),
FLT_D -> List(FCMD_CMP, N,N,Y,Y,N,N,N,N,N,Y,N,N,N,N,N,Y),
FLE_D -> List(FCMD_CMP, N,N,Y,Y,N,N,N,N,N,Y,N,N,N,N,N,Y),
FSGNJ_S -> List(FCMD_SGNJ, N,Y,Y,Y,N,N,N,Y,N,N,Y,N,N,N,N,N),
FSGNJN_S -> List(FCMD_SGNJ, N,Y,Y,Y,N,N,N,Y,N,N,Y,N,N,N,N,N),
FSGNJX_S -> List(FCMD_SGNJ, N,Y,Y,Y,N,N,N,Y,N,N,Y,N,N,N,N,N),
FSGNJ_D -> List(FCMD_SGNJ, N,Y,Y,Y,N,N,N,N,N,N,Y,N,N,N,N,N),
FSGNJN_D -> List(FCMD_SGNJ, N,Y,Y,Y,N,N,N,N,N,N,Y,N,N,N,N,N),
FSGNJX_D -> List(FCMD_SGNJ, N,Y,Y,Y,N,N,N,N,N,N,Y,N,N,N,N,N),
FMIN_S -> List(FCMD_MINMAX, N,Y,Y,Y,N,N,N,Y,N,N,Y,N,N,N,N,Y),
FMAX_S -> List(FCMD_MINMAX, N,Y,Y,Y,N,N,N,Y,N,N,Y,N,N,N,N,Y),
FMIN_D -> List(FCMD_MINMAX, N,Y,Y,Y,N,N,N,N,N,N,Y,N,N,N,N,Y),
FMAX_D -> List(FCMD_MINMAX, N,Y,Y,Y,N,N,N,N,N,N,Y,N,N,N,N,Y),
FADD_S -> List(FCMD_ADD, N,Y,Y,Y,N,N,Y,Y,N,N,N,Y,N,N,Y,Y),
FSUB_S -> List(FCMD_SUB, N,Y,Y,Y,N,N,Y,Y,N,N,N,Y,N,N,Y,Y),
FMUL_S -> List(FCMD_MUL, N,Y,Y,Y,N,N,N,Y,N,N,N,Y,N,N,Y,Y),
FADD_D -> List(FCMD_ADD, N,Y,Y,Y,N,N,Y,N,N,N,N,Y,N,N,Y,Y),
FSUB_D -> List(FCMD_SUB, N,Y,Y,Y,N,N,Y,N,N,N,N,Y,N,N,Y,Y),
FMUL_D -> List(FCMD_MUL, N,Y,Y,Y,N,N,N,N,N,N,N,Y,N,N,Y,Y),
FMADD_S -> List(FCMD_MADD, N,Y,Y,Y,Y,N,N,Y,N,N,N,Y,N,N,Y,Y),
FMSUB_S -> List(FCMD_MSUB, N,Y,Y,Y,Y,N,N,Y,N,N,N,Y,N,N,Y,Y),
FNMADD_S -> List(FCMD_NMADD, N,Y,Y,Y,Y,N,N,Y,N,N,N,Y,N,N,Y,Y),
FNMSUB_S -> List(FCMD_NMSUB, N,Y,Y,Y,Y,N,N,Y,N,N,N,Y,N,N,Y,Y),
FMADD_D -> List(FCMD_MADD, N,Y,Y,Y,Y,N,N,N,N,N,N,Y,N,N,Y,Y),
FMSUB_D -> List(FCMD_MSUB, N,Y,Y,Y,Y,N,N,N,N,N,N,Y,N,N,Y,Y),
FNMADD_D -> List(FCMD_NMADD, N,Y,Y,Y,Y,N,N,N,N,N,N,Y,N,N,Y,Y),
FNMSUB_D -> List(FCMD_NMSUB, N,Y,Y,Y,Y,N,N,N,N,N,N,Y,N,N,Y,Y),
FDIV_S -> List(FCMD_DIV, N,Y,Y,Y,N,N,N,Y,N,N,N,N,Y,N,Y,Y),
FSQRT_S -> List(FCMD_SQRT, N,Y,Y,N,N,Y,X,Y,N,N,N,N,N,Y,Y,Y),
FDIV_D -> List(FCMD_DIV, N,Y,Y,Y,N,N,N,N,N,N,N,N,Y,N,Y,Y),
FSQRT_D -> List(FCMD_SQRT, N,Y,Y,N,N,Y,X,N,N,N,N,N,N,Y,Y,Y)
))
val s = io.sigs
val sigs = Seq(s.cmd, s.ldst, s.wen, s.ren1, s.ren2, s.ren3, s.swap12,
s.swap23, s.single, s.fromint, s.toint, s.fastpipe, s.fma,
s.div, s.sqrt, s.round, s.wflags)
sigs zip decoder map {case(s,d) => s := d}
}
class FPUIO(implicit p: Parameters) extends CoreBundle {
val inst = Bits(INPUT, 32)
val fromint_data = Bits(INPUT, xLen)
val fcsr_rm = Bits(INPUT, FPConstants.RM_SZ)
val fcsr_flags = Valid(Bits(width = FPConstants.FLAGS_SZ))
val store_data = Bits(OUTPUT, 64)
val toint_data = Bits(OUTPUT, xLen)
val dmem_resp_val = Bool(INPUT)
val dmem_resp_type = Bits(INPUT, 3)
val dmem_resp_tag = UInt(INPUT, 5)
val dmem_resp_data = Bits(INPUT, 64)
val valid = Bool(INPUT)
val fcsr_rdy = Bool(OUTPUT)
val nack_mem = Bool(OUTPUT)
val illegal_rm = Bool(OUTPUT)
val killx = Bool(INPUT)
val killm = Bool(INPUT)
val dec = new FPUCtrlSigs().asOutput
val sboard_set = Bool(OUTPUT)
val sboard_clr = Bool(OUTPUT)
val sboard_clra = UInt(OUTPUT, 5)
val cp_req = Decoupled(new FPInput()).flip //cp doesn't pay attn to kill sigs
val cp_resp = Decoupled(new FPResult())
}
class FPResult extends Bundle
{
val data = Bits(width = 65)
val exc = Bits(width = 5)
}
class FPInput extends FPUCtrlSigs {
val rm = Bits(width = 3)
val typ = Bits(width = 2)
val in1 = Bits(width = 65)
val in2 = Bits(width = 65)
val in3 = Bits(width = 65)
}
object ClassifyRecFN {
def apply(expWidth: Int, sigWidth: Int, in: UInt) = {
val sign = in(sigWidth + expWidth)
val exp = in(sigWidth + expWidth - 1, sigWidth - 1)
val sig = in(sigWidth - 2, 0)
val code = exp(expWidth,expWidth-2)
val codeHi = code(2, 1)
val isSpecial = codeHi === UInt(3)
val isHighSubnormalIn = exp(expWidth-2, 0) < UInt(2)
val isSubnormal = code === UInt(1) || codeHi === UInt(1) && isHighSubnormalIn
val isNormal = codeHi === UInt(1) && !isHighSubnormalIn || codeHi === UInt(2)
val isZero = code === UInt(0)
val isInf = isSpecial && !exp(expWidth-2)
val isNaN = code.andR
val isSNaN = isNaN && !sig(sigWidth-2)
val isQNaN = isNaN && sig(sigWidth-2)
Cat(isQNaN, isSNaN, isInf && !sign, isNormal && !sign,
isSubnormal && !sign, isZero && !sign, isZero && sign,
isSubnormal && sign, isNormal && sign, isInf && sign)
}
}
class FPToInt extends Module
{
val io = new Bundle {
val in = Valid(new FPInput).flip
val as_double = new FPInput().asOutput
val out = Valid(new Bundle {
val lt = Bool()
val store = Bits(width = 64)
val toint = Bits(width = 64)
val exc = Bits(width = 5)
})
}
val in = Reg(new FPInput)
val valid = Reg(next=io.in.valid)
def upconvert(x: UInt) = {
val s2d = Module(new hardfloat.RecFNToRecFN(8, 24, 11, 53))
s2d.io.in := x
s2d.io.roundingMode := UInt(0)
s2d.io.out
}
val in1_upconvert = upconvert(io.in.bits.in1)
val in2_upconvert = upconvert(io.in.bits.in2)
when (io.in.valid) {
in := io.in.bits
when (io.in.bits.single && !io.in.bits.ldst && io.in.bits.cmd =/= FCMD_MV_XF) {
in.in1 := in1_upconvert
in.in2 := in2_upconvert
}
}
val unrec_s = hardfloat.fNFromRecFN(8, 24, in.in1)
val unrec_d = hardfloat.fNFromRecFN(11, 53, in.in1)
val unrec_out = Mux(in.single, Cat(Fill(32, unrec_s(31)), unrec_s), unrec_d)
val classify_s = ClassifyRecFN(8, 24, in.in1)
val classify_d = ClassifyRecFN(11, 53, in.in1)
val classify_out = Mux(in.single, classify_s, classify_d)
val dcmp = Module(new hardfloat.CompareRecFN(11, 53))
dcmp.io.a := in.in1
dcmp.io.b := in.in2
dcmp.io.signaling := Bool(true)
val dcmp_out = (~in.rm & Cat(dcmp.io.lt, dcmp.io.eq)).orR
val dcmp_exc = dcmp.io.exceptionFlags
val d2l = Module(new hardfloat.RecFNToIN(11, 53, 64))
val d2w = Module(new hardfloat.RecFNToIN(11, 53, 32))
d2l.io.in := in.in1
d2l.io.roundingMode := in.rm
d2l.io.signedOut := ~in.typ(0)
d2w.io.in := in.in1
d2w.io.roundingMode := in.rm
d2w.io.signedOut := ~in.typ(0)
io.out.bits.toint := Mux(in.rm(0), classify_out, unrec_out)
io.out.bits.store := unrec_out
io.out.bits.exc := Bits(0)
when (in.cmd === FCMD_CMP) {
io.out.bits.toint := dcmp_out
io.out.bits.exc := dcmp_exc
}
when (in.cmd === FCMD_CVT_IF) {
io.out.bits.toint := Mux(in.typ(1), d2l.io.out.asSInt, d2w.io.out.asSInt).asUInt
val dflags = Mux(in.typ(1), d2l.io.intExceptionFlags, d2w.io.intExceptionFlags)
io.out.bits.exc := Cat(dflags(2, 1).orR, UInt(0, 3), dflags(0))
}
io.out.valid := valid
io.out.bits.lt := dcmp.io.lt
io.as_double := in
}
class IntToFP(val latency: Int) extends Module
{
val io = new Bundle {
val in = Valid(new FPInput).flip
val out = Valid(new FPResult)
}
val in = Pipe(io.in)
val mux = Wire(new FPResult)
mux.exc := Bits(0)
mux.data := hardfloat.recFNFromFN(11, 53, in.bits.in1)
when (in.bits.single) {
mux.data := Cat(SInt(-1, 32), hardfloat.recFNFromFN(8, 24, in.bits.in1))
}
val longValue =
Mux(in.bits.typ(1), in.bits.in1.asSInt,
Mux(in.bits.typ(0), in.bits.in1(31,0).zext, in.bits.in1(31,0).asSInt))
val l2s = Module(new hardfloat.INToRecFN(64, 8, 24))
l2s.io.signedIn := ~in.bits.typ(0)
l2s.io.in := longValue.asUInt
l2s.io.roundingMode := in.bits.rm
val l2d = Module(new hardfloat.INToRecFN(64, 11, 53))
l2d.io.signedIn := ~in.bits.typ(0)
l2d.io.in := longValue.asUInt
l2d.io.roundingMode := in.bits.rm
when (in.bits.cmd === FCMD_CVT_FI) {
when (in.bits.single) {
mux.data := Cat(SInt(-1, 32), l2s.io.out)
mux.exc := l2s.io.exceptionFlags
}.otherwise {
mux.data := l2d.io.out
mux.exc := l2d.io.exceptionFlags
}
}
io.out <> Pipe(in.valid, mux, latency-1)
}
class FPToFP(val latency: Int) extends Module
{
val io = new Bundle {
val in = Valid(new FPInput).flip
val out = Valid(new FPResult)
val lt = Bool(INPUT) // from FPToInt
}
val in = Pipe(io.in)
// fp->fp units
val isSgnj = in.bits.cmd === FCMD_SGNJ
def fsgnjSign(in1: Bits, in2: Bits, pos: Int, en: Bool, rm: Bits) =
Mux(rm(1) || !en, in1(pos), rm(0)) ^ (en && in2(pos))
val sign_s = fsgnjSign(in.bits.in1, in.bits.in2, 32, in.bits.single && isSgnj, in.bits.rm)
val sign_d = fsgnjSign(in.bits.in1, in.bits.in2, 64, !in.bits.single && isSgnj, in.bits.rm)
val fsgnj = Cat(sign_d, in.bits.in1(63,33), sign_s, in.bits.in1(31,0))
val s2d = Module(new hardfloat.RecFNToRecFN(8, 24, 11, 53))
val d2s = Module(new hardfloat.RecFNToRecFN(11, 53, 8, 24))
s2d.io.in := in.bits.in1
s2d.io.roundingMode := in.bits.rm
d2s.io.in := in.bits.in1
d2s.io.roundingMode := in.bits.rm
val isnan1 = Mux(in.bits.single, in.bits.in1(31,29).andR, in.bits.in1(63,61).andR)
val isnan2 = Mux(in.bits.single, in.bits.in2(31,29).andR, in.bits.in2(63,61).andR)
val issnan1 = isnan1 && ~Mux(in.bits.single, in.bits.in1(22), in.bits.in1(51))
val issnan2 = isnan2 && ~Mux(in.bits.single, in.bits.in2(22), in.bits.in2(51))
val minmax_exc = Cat(issnan1 || issnan2, Bits(0,4))
val isMax = in.bits.rm(0)
val isLHS = isnan2 || isMax =/= io.lt && !isnan1
val mux = Wire(new FPResult)
mux.exc := minmax_exc
mux.data := in.bits.in2
when (isSgnj) { mux.exc := UInt(0) }
when (isSgnj || isLHS) { mux.data := fsgnj }
when (in.bits.cmd === FCMD_CVT_FF) {
when (in.bits.single) {
mux.data := Cat(SInt(-1, 32), d2s.io.out)
mux.exc := d2s.io.exceptionFlags
}.otherwise {
mux.data := s2d.io.out
mux.exc := s2d.io.exceptionFlags
}
}
io.out <> Pipe(in.valid, mux, latency-1)
}
class FPUFMAPipe(val latency: Int, expWidth: Int, sigWidth: Int) extends Module
{
val io = new Bundle {
val in = Valid(new FPInput).flip
val out = Valid(new FPResult)
}
val width = sigWidth + expWidth
val one = UInt(1) << (width-1)
val zero = (io.in.bits.in1(width) ^ io.in.bits.in2(width)) << width
val valid = Reg(next=io.in.valid)
val in = Reg(new FPInput)
when (io.in.valid) {
in := io.in.bits
val cmd_fma = io.in.bits.ren3
val cmd_addsub = io.in.bits.swap23
in.cmd := Cat(io.in.bits.cmd(1) & (cmd_fma || cmd_addsub), io.in.bits.cmd(0))
when (cmd_addsub) { in.in2 := one }
unless (cmd_fma || cmd_addsub) { in.in3 := zero }
}
val fma = Module(new hardfloat.MulAddRecFN(expWidth, sigWidth))
fma.io.op := in.cmd
fma.io.roundingMode := in.rm
fma.io.a := in.in1
fma.io.b := in.in2
fma.io.c := in.in3
val res = Wire(new FPResult)
res.data := Cat(SInt(-1, 32), fma.io.out)
res.exc := fma.io.exceptionFlags
io.out := Pipe(valid, res, latency-1)
}
class FPU(cfg: FPUConfig)(implicit p: Parameters) extends CoreModule()(p) {
require(xLen == 64, "RV32 Rocket FP support missing")
val io = new FPUIO
val ex_reg_valid = Reg(next=io.valid, init=Bool(false))
val req_valid = ex_reg_valid || io.cp_req.valid
val ex_reg_inst = RegEnable(io.inst, io.valid)
val ex_cp_valid = io.cp_req.valid && !ex_reg_valid
val mem_reg_valid = Reg(next=ex_reg_valid && !io.killx || ex_cp_valid, init=Bool(false))
val mem_reg_inst = RegEnable(ex_reg_inst, ex_reg_valid)
val mem_cp_valid = Reg(next=ex_cp_valid, init=Bool(false))
val killm = (io.killm || io.nack_mem) && !mem_cp_valid
val wb_reg_valid = Reg(next=mem_reg_valid && (!killm || mem_cp_valid), init=Bool(false))
val wb_cp_valid = Reg(next=mem_cp_valid, init=Bool(false))
val fp_decoder = Module(new FPUDecoder)
fp_decoder.io.inst := io.inst
val cp_ctrl = Wire(new FPUCtrlSigs)
cp_ctrl <> io.cp_req.bits
io.cp_resp.valid := Bool(false)
io.cp_resp.bits.data := UInt(0)
val id_ctrl = fp_decoder.io.sigs
val ex_ctrl = Mux(ex_reg_valid, RegEnable(id_ctrl, io.valid), cp_ctrl)
val mem_ctrl = RegEnable(ex_ctrl, req_valid)
val wb_ctrl = RegEnable(mem_ctrl, mem_reg_valid)
// load response
val load_wb = Reg(next=io.dmem_resp_val)
val load_wb_single = RegEnable(!io.dmem_resp_type(0), io.dmem_resp_val)
val load_wb_data = RegEnable(io.dmem_resp_data, io.dmem_resp_val)
val load_wb_tag = RegEnable(io.dmem_resp_tag, io.dmem_resp_val)
val rec_s = hardfloat.recFNFromFN(8, 24, load_wb_data)
val rec_d = hardfloat.recFNFromFN(11, 53, load_wb_data)
val load_wb_data_recoded = Mux(load_wb_single, Cat(SInt(-1, 32), rec_s), rec_d)
// regfile
val regfile = Mem(32, Bits(width = 65))
when (load_wb) {
regfile(load_wb_tag) := load_wb_data_recoded
if (enableCommitLog) {
printf ("f%d p%d 0x%x\n", load_wb_tag, load_wb_tag + UInt(32),
Mux(load_wb_single, load_wb_data(31,0), load_wb_data))
}
}
val ex_ra1::ex_ra2::ex_ra3::Nil = List.fill(3)(Reg(UInt()))
when (io.valid) {
when (id_ctrl.ren1) {
when (!id_ctrl.swap12) { ex_ra1 := io.inst(19,15) }
when (id_ctrl.swap12) { ex_ra2 := io.inst(19,15) }
}
when (id_ctrl.ren2) {
when (id_ctrl.swap12) { ex_ra1 := io.inst(24,20) }
when (id_ctrl.swap23) { ex_ra3 := io.inst(24,20) }
when (!id_ctrl.swap12 && !id_ctrl.swap23) { ex_ra2 := io.inst(24,20) }
}
when (id_ctrl.ren3) { ex_ra3 := io.inst(31,27) }
}
val ex_rs1::ex_rs2::ex_rs3::Nil = Seq(ex_ra1, ex_ra2, ex_ra3).map(regfile(_))
val ex_rm = Mux(ex_reg_inst(14,12) === Bits(7), io.fcsr_rm, ex_reg_inst(14,12))
val cp_rs1 = io.cp_req.bits.in1
val cp_rs2 = Mux(io.cp_req.bits.swap23, io.cp_req.bits.in3, io.cp_req.bits.in2)
val cp_rs3 = Mux(io.cp_req.bits.swap23, io.cp_req.bits.in2, io.cp_req.bits.in3)
val req = Wire(new FPInput)
req := ex_ctrl
req.rm := Mux(ex_reg_valid, ex_rm, io.cp_req.bits.rm)
req.in1 := Mux(ex_reg_valid, ex_rs1, cp_rs1)
req.in2 := Mux(ex_reg_valid, ex_rs2, cp_rs2)
req.in3 := Mux(ex_reg_valid, ex_rs3, cp_rs3)
req.typ := Mux(ex_reg_valid, ex_reg_inst(21,20), io.cp_req.bits.typ)
val sfma = Module(new FPUFMAPipe(cfg.sfmaLatency, 8, 24))
sfma.io.in.valid := req_valid && ex_ctrl.fma && ex_ctrl.single
sfma.io.in.bits := req
val dfma = Module(new FPUFMAPipe(cfg.dfmaLatency, 11, 53))
dfma.io.in.valid := req_valid && ex_ctrl.fma && !ex_ctrl.single
dfma.io.in.bits := req
val fpiu = Module(new FPToInt)
fpiu.io.in.valid := req_valid && (ex_ctrl.toint || ex_ctrl.div || ex_ctrl.sqrt || ex_ctrl.cmd === FCMD_MINMAX)
fpiu.io.in.bits := req
io.store_data := fpiu.io.out.bits.store
io.toint_data := fpiu.io.out.bits.toint
when(fpiu.io.out.valid && mem_cp_valid && mem_ctrl.toint){
io.cp_resp.bits.data := fpiu.io.out.bits.toint
io.cp_resp.valid := Bool(true)
}
val ifpu = Module(new IntToFP(3))
ifpu.io.in.valid := req_valid && ex_ctrl.fromint
ifpu.io.in.bits := req
ifpu.io.in.bits.in1 := Mux(ex_reg_valid, io.fromint_data, cp_rs1)
val fpmu = Module(new FPToFP(2))
fpmu.io.in.valid := req_valid && ex_ctrl.fastpipe
fpmu.io.in.bits := req
fpmu.io.lt := fpiu.io.out.bits.lt
val divSqrt_wen = Reg(next=Bool(false))
val divSqrt_inReady = Wire(init=Bool(false))
val divSqrt_waddr = Reg(Bits())
val divSqrt_wdata = Wire(Bits())
val divSqrt_flags = Wire(Bits())
val divSqrt_in_flight = Reg(init=Bool(false))
val divSqrt_killed = Reg(Bool())
// writeback arbitration
case class Pipe(p: Module, lat: Int, cond: (FPUCtrlSigs) => Bool, res: FPResult)
val pipes = List(
Pipe(fpmu, fpmu.latency, (c: FPUCtrlSigs) => c.fastpipe, fpmu.io.out.bits),
Pipe(ifpu, ifpu.latency, (c: FPUCtrlSigs) => c.fromint, ifpu.io.out.bits),
Pipe(sfma, sfma.latency, (c: FPUCtrlSigs) => c.fma && c.single, sfma.io.out.bits),
Pipe(dfma, dfma.latency, (c: FPUCtrlSigs) => c.fma && !c.single, dfma.io.out.bits))
def latencyMask(c: FPUCtrlSigs, offset: Int) = {
require(pipes.forall(_.lat >= offset))
pipes.map(p => Mux(p.cond(c), UInt(1 << p.lat-offset), UInt(0))).reduce(_|_)
}
def pipeid(c: FPUCtrlSigs) = pipes.zipWithIndex.map(p => Mux(p._1.cond(c), UInt(p._2), UInt(0))).reduce(_|_)
val maxLatency = pipes.map(_.lat).max
val memLatencyMask = latencyMask(mem_ctrl, 2)
class WBInfo extends Bundle {
val rd = UInt(width = 5)
val single = Bool()
val cp = Bool()
val pipeid = UInt(width = log2Ceil(pipes.size))
override def cloneType: this.type = new WBInfo().asInstanceOf[this.type]
}
val wen = Reg(init=Bits(0, maxLatency-1))
val wbInfo = Reg(Vec(maxLatency-1, new WBInfo))
val mem_wen = mem_reg_valid && (mem_ctrl.fma || mem_ctrl.fastpipe || mem_ctrl.fromint)
val write_port_busy = RegEnable(mem_wen && (memLatencyMask & latencyMask(ex_ctrl, 1)).orR || (wen & latencyMask(ex_ctrl, 0)).orR, req_valid)
for (i <- 0 until maxLatency-2) {
when (wen(i+1)) { wbInfo(i) := wbInfo(i+1) }
}
wen := wen >> 1
when (mem_wen) {
when (!killm) {
wen := wen >> 1 | memLatencyMask
}
for (i <- 0 until maxLatency-1) {
when (!write_port_busy && memLatencyMask(i)) {
wbInfo(i).cp := mem_cp_valid
wbInfo(i).single := mem_ctrl.single
wbInfo(i).pipeid := pipeid(mem_ctrl)
wbInfo(i).rd := mem_reg_inst(11,7)
}
}
}
val waddr = Mux(divSqrt_wen, divSqrt_waddr, wbInfo(0).rd)
val wdata = Mux(divSqrt_wen, divSqrt_wdata, (pipes.map(_.res.data): Seq[UInt])(wbInfo(0).pipeid))
val wexc = (pipes.map(_.res.exc): Seq[UInt])(wbInfo(0).pipeid)
when ((!wbInfo(0).cp && wen(0)) || divSqrt_wen) {
regfile(waddr) := wdata
if (enableCommitLog) {
val wdata_unrec_s = hardfloat.fNFromRecFN(8, 24, wdata(64,0))
val wdata_unrec_d = hardfloat.fNFromRecFN(11, 53, wdata(64,0))
printf ("f%d p%d 0x%x\n", waddr, waddr+ UInt(32),
Mux(wbInfo(0).single, Cat(UInt(0,32), wdata_unrec_s), wdata_unrec_d))
}
}
when (wbInfo(0).cp && wen(0)) {
io.cp_resp.bits.data := wdata
io.cp_resp.valid := Bool(true)
}
io.cp_req.ready := !ex_reg_valid
val wb_toint_valid = wb_reg_valid && wb_ctrl.toint
val wb_toint_exc = RegEnable(fpiu.io.out.bits.exc, mem_ctrl.toint)
io.fcsr_flags.valid := wb_toint_valid || divSqrt_wen || wen(0)
io.fcsr_flags.bits :=
Mux(wb_toint_valid, wb_toint_exc, UInt(0)) |
Mux(divSqrt_wen, divSqrt_flags, UInt(0)) |
Mux(wen(0), wexc, UInt(0))
val units_busy = mem_reg_valid && (mem_ctrl.div || mem_ctrl.sqrt) && (!divSqrt_inReady || wen.orR) // || mem_reg_valid && mem_ctrl.fma && Reg(next=Mux(ex_ctrl.single, io.sfma.valid, io.dfma.valid))
io.fcsr_rdy := !(ex_reg_valid && ex_ctrl.wflags || mem_reg_valid && mem_ctrl.wflags || wb_reg_valid && wb_ctrl.toint || wen.orR || divSqrt_in_flight)
io.nack_mem := units_busy || write_port_busy || divSqrt_in_flight
io.dec <> fp_decoder.io.sigs
def useScoreboard(f: ((Pipe, Int)) => Bool) = pipes.zipWithIndex.filter(_._1.lat > 3).map(x => f(x)).fold(Bool(false))(_||_)
io.sboard_set := wb_reg_valid && !wb_cp_valid && Reg(next=useScoreboard(_._1.cond(mem_ctrl)) || mem_ctrl.div || mem_ctrl.sqrt)
io.sboard_clr := !wb_cp_valid && (divSqrt_wen || (wen(0) && useScoreboard(x => wbInfo(0).pipeid === UInt(x._2))))
io.sboard_clra := waddr
// we don't currently support round-max-magnitude (rm=4)
io.illegal_rm := ex_rm(2) && ex_ctrl.round
divSqrt_wdata := 0
divSqrt_flags := 0
if (cfg.divSqrt) {
val divSqrt_single = Reg(Bool())
val divSqrt_rm = Reg(Bits())
val divSqrt_flags_double = Reg(Bits())
val divSqrt_wdata_double = Reg(Bits())
val divSqrt = Module(new hardfloat.DivSqrtRecF64)
divSqrt_inReady := Mux(divSqrt.io.sqrtOp, divSqrt.io.inReady_sqrt, divSqrt.io.inReady_div)
val divSqrt_outValid = divSqrt.io.outValid_div || divSqrt.io.outValid_sqrt
divSqrt.io.inValid := mem_reg_valid && (mem_ctrl.div || mem_ctrl.sqrt) && !divSqrt_in_flight
divSqrt.io.sqrtOp := mem_ctrl.sqrt
divSqrt.io.a := fpiu.io.as_double.in1
divSqrt.io.b := fpiu.io.as_double.in2
divSqrt.io.roundingMode := fpiu.io.as_double.rm
when (divSqrt.io.inValid && divSqrt_inReady) {
divSqrt_in_flight := true
divSqrt_killed := killm
divSqrt_single := mem_ctrl.single
divSqrt_waddr := mem_reg_inst(11,7)
divSqrt_rm := divSqrt.io.roundingMode
}
when (divSqrt_outValid) {
divSqrt_wen := !divSqrt_killed
divSqrt_wdata_double := divSqrt.io.out
divSqrt_in_flight := false
divSqrt_flags_double := divSqrt.io.exceptionFlags
}
val divSqrt_toSingle = Module(new hardfloat.RecFNToRecFN(11, 53, 8, 24))
divSqrt_toSingle.io.in := divSqrt_wdata_double
divSqrt_toSingle.io.roundingMode := divSqrt_rm
divSqrt_wdata := Mux(divSqrt_single, divSqrt_toSingle.io.out, divSqrt_wdata_double)
divSqrt_flags := divSqrt_flags_double | Mux(divSqrt_single, divSqrt_toSingle.io.exceptionFlags, Bits(0))
} else {
when (ex_ctrl.div || ex_ctrl.sqrt) { io.illegal_rm := true }
}
}

View File

@ -0,0 +1,133 @@
package rocket
import Chisel._
import uncore.tilelink._
import Util._
import cde.{Parameters, Field}
class FrontendReq(implicit p: Parameters) extends CoreBundle()(p) {
val pc = UInt(width = vaddrBitsExtended)
val speculative = Bool()
}
class FrontendResp(implicit p: Parameters) extends CoreBundle()(p) {
val btb = Valid(new BTBResp)
val pc = UInt(width = vaddrBitsExtended) // ID stage PC
val data = UInt(width = fetchWidth * coreInstBits)
val mask = Bits(width = fetchWidth)
val xcpt_if = Bool()
val replay = Bool()
}
class FrontendIO(implicit p: Parameters) extends CoreBundle()(p) {
val req = Valid(new FrontendReq)
val resp = Decoupled(new FrontendResp).flip
val btb_update = Valid(new BTBUpdate)
val bht_update = Valid(new BHTUpdate)
val ras_update = Valid(new RASUpdate)
val flush_icache = Bool(OUTPUT)
val flush_tlb = Bool(OUTPUT)
val npc = UInt(INPUT, width = vaddrBitsExtended)
}
class Frontend(implicit p: Parameters) extends CoreModule()(p) with HasL1CacheParameters {
val io = new Bundle {
val cpu = new FrontendIO().flip
val ptw = new TLBPTWIO()
val mem = new ClientUncachedTileLinkIO
}
val icache = Module(new ICache(latency = 2))
val tlb = Module(new TLB)
val s1_pc_ = Reg(UInt(width=vaddrBitsExtended))
val s1_pc = ~(~s1_pc_ | (coreInstBytes-1)) // discard PC LSBS (this propagates down the pipeline)
val s1_speculative = Reg(Bool())
val s1_same_block = Reg(Bool())
val s2_valid = Reg(init=Bool(true))
val s2_pc = Reg(init=UInt(p(ResetVector)))
val s2_btb_resp_valid = Reg(init=Bool(false))
val s2_btb_resp_bits = Reg(new BTBResp)
val s2_xcpt_if = Reg(init=Bool(false))
val s2_speculative = Reg(init=Bool(false))
val s2_cacheable = Reg(init=Bool(false))
val ntpc = ~(~s1_pc | (coreInstBytes*fetchWidth-1)) + UInt(coreInstBytes*fetchWidth)
val ntpc_same_block = (ntpc & rowBytes) === (s1_pc & rowBytes)
val predicted_npc = Wire(init = ntpc)
val predicted_taken = Wire(init = Bool(false))
val icmiss = s2_valid && !icache.io.resp.valid
val npc = Mux(icmiss, s2_pc, predicted_npc)
val s0_same_block = !predicted_taken && !icmiss && !io.cpu.req.valid && ntpc_same_block
val stall = io.cpu.resp.valid && !io.cpu.resp.ready
when (!stall) {
s1_same_block := s0_same_block && !tlb.io.resp.miss
s1_pc_ := io.cpu.npc
// consider RVC fetches across blocks to be non-speculative if the first
// part was non-speculative
val s0_speculative =
if (usingCompressed) s1_speculative || s2_valid && !s2_speculative || predicted_taken
else Bool(true)
s1_speculative := Mux(icmiss, s2_speculative, s0_speculative)
s2_valid := !icmiss
when (!icmiss) {
s2_pc := s1_pc
s2_speculative := s1_speculative
s2_cacheable := tlb.io.resp.cacheable
s2_xcpt_if := tlb.io.resp.xcpt_if
}
}
when (io.cpu.req.valid) {
s1_same_block := Bool(false)
s1_pc_ := io.cpu.npc
s1_speculative := io.cpu.req.bits.speculative
s2_valid := Bool(false)
}
if (p(BtbKey).nEntries > 0) {
val btb = Module(new BTB)
btb.io.req.valid := false
btb.io.req.bits.addr := s1_pc_
btb.io.btb_update := io.cpu.btb_update
btb.io.bht_update := io.cpu.bht_update
btb.io.ras_update := io.cpu.ras_update
when (!stall && !icmiss) {
btb.io.req.valid := true
s2_btb_resp_valid := btb.io.resp.valid
s2_btb_resp_bits := btb.io.resp.bits
}
when (btb.io.resp.valid && btb.io.resp.bits.taken) {
predicted_npc := btb.io.resp.bits.target.sextTo(vaddrBitsExtended)
predicted_taken := Bool(true)
}
}
io.ptw <> tlb.io.ptw
tlb.io.req.valid := !stall && !icmiss
tlb.io.req.bits.vpn := s1_pc >> pgIdxBits
tlb.io.req.bits.passthrough := Bool(false)
tlb.io.req.bits.instruction := Bool(true)
tlb.io.req.bits.store := Bool(false)
io.mem <> icache.io.mem
icache.io.req.valid := !stall && !s0_same_block
icache.io.req.bits.addr := io.cpu.npc
icache.io.invalidate := io.cpu.flush_icache
icache.io.s1_ppn := tlb.io.resp.ppn
icache.io.s1_kill := io.cpu.req.valid || tlb.io.resp.miss || tlb.io.resp.xcpt_if || icmiss || io.cpu.flush_tlb
icache.io.s2_kill := s2_speculative && !s2_cacheable
icache.io.resp.ready := !stall && !s1_same_block
io.cpu.resp.valid := s2_valid && (icache.io.resp.valid || icache.io.s2_kill || s2_xcpt_if)
io.cpu.resp.bits.pc := s2_pc
io.cpu.npc := Mux(io.cpu.req.valid, io.cpu.req.bits.pc, npc)
require(fetchWidth * coreInstBytes <= rowBytes && isPow2(fetchWidth))
io.cpu.resp.bits.data := icache.io.resp.bits.datablock >> (s2_pc.extract(log2Ceil(rowBytes)-1,log2Ceil(fetchWidth*coreInstBytes)) << log2Ceil(fetchWidth*coreInstBits))
io.cpu.resp.bits.mask := UInt((1 << fetchWidth)-1) << s2_pc.extract(log2Ceil(fetchWidth)+log2Ceil(coreInstBytes)-1, log2Ceil(coreInstBytes))
io.cpu.resp.bits.xcpt_if := s2_xcpt_if
io.cpu.resp.bits.replay := icache.io.s2_kill && !icache.io.resp.valid && !s2_xcpt_if
io.cpu.resp.bits.btb.valid := s2_btb_resp_valid
io.cpu.resp.bits.btb.bits := s2_btb_resp_bits
}

View File

@ -0,0 +1,132 @@
// See LICENSE for license details.
package rocket
import Chisel._
import Util._
import cde.{Parameters, Field}
import junctions._
class Instruction(implicit val p: Parameters) extends ParameterizedBundle with HasCoreParameters {
val pf0 = Bool() // page fault on first half of instruction
val pf1 = Bool() // page fault on second half of instruction
val replay = Bool()
val btb_hit = Bool()
val rvc = Bool()
val inst = new ExpandedInstruction
require(coreInstBits == (if (usingCompressed) 16 else 32))
}
class IBuf(implicit p: Parameters) extends CoreModule {
val io = new Bundle {
val imem = Decoupled(new FrontendResp).flip
val kill = Bool(INPUT)
val pc = UInt(width = vaddrBitsExtended)
val btb_resp = new BTBResp().asOutput
val inst = Vec(retireWidth, Decoupled(new Instruction))
}
// This module is meant to be more general, but it's not there yet
require(decodeWidth == 1)
val n = fetchWidth - 1
val nBufValid = if (n == 0) UInt(0) else Reg(init=UInt(0, log2Ceil(fetchWidth)))
val buf = Reg(io.imem.bits)
val ibufBTBHit = Reg(Bool())
val ibufBTBResp = Reg(new BTBResp)
val pcWordMask = UInt(coreInstBytes*fetchWidth-1, vaddrBitsExtended)
val pcWordBits = io.imem.bits.pc.extract(log2Ceil(fetchWidth*coreInstBytes)-1, log2Ceil(coreInstBytes))
val nReady = Wire(init = UInt(0, log2Ceil(fetchWidth+1)))
val nIC = Mux(io.imem.bits.btb.valid && io.imem.bits.btb.bits.taken, io.imem.bits.btb.bits.bridx +& 1, UInt(fetchWidth)) - pcWordBits
val nICReady = nReady - nBufValid
val nValid = Mux(io.imem.valid, nIC, UInt(0)) + nBufValid
io.imem.ready := nReady >= nBufValid && (nICReady >= nIC || n >= nIC - nICReady)
if (n > 0) {
nBufValid := Mux(nReady >= nBufValid, UInt(0), nBufValid - nReady)
if (n > 1) when (nReady > 0 && nReady < nBufValid) {
val shiftedBuf = shiftInsnRight(buf.data(n*coreInstBits-1, coreInstBits), (nReady-1)(log2Ceil(n-1)-1,0))
buf.data := Cat(buf.data(n*coreInstBits-1, (n-1)*coreInstBits), shiftedBuf((n-1)*coreInstBits-1, 0))
buf.pc := buf.pc & ~pcWordMask | (buf.pc + (nReady << log2Ceil(coreInstBytes))) & pcWordMask
ibufBTBResp.bridx := ibufBTBResp.bridx - nReady
}
when (io.imem.valid && nReady >= nBufValid && nICReady < nIC && n >= nIC - nICReady) {
val shamt = pcWordBits + nICReady
nBufValid := nIC - nICReady
buf := io.imem.bits
buf.data := shiftInsnRight(io.imem.bits.data, shamt)(n*coreInstBits-1,0)
buf.pc := io.imem.bits.pc & ~pcWordMask | (io.imem.bits.pc + (nICReady << log2Ceil(coreInstBytes))) & pcWordMask
ibufBTBHit := io.imem.bits.btb.valid
when (io.imem.bits.btb.valid) {
ibufBTBResp := io.imem.bits.btb.bits
ibufBTBResp.bridx := io.imem.bits.btb.bits.bridx + nICReady
}
}
when (io.kill) {
nBufValid := 0
}
}
val icShiftAmt = (fetchWidth + nBufValid - pcWordBits)(log2Ceil(fetchWidth), 0)
val icData = shiftInsnLeft(Cat(io.imem.bits.data, Fill(fetchWidth, io.imem.bits.data(coreInstBits-1, 0))), icShiftAmt)
.extract(3*fetchWidth*coreInstBits-1, 2*fetchWidth*coreInstBits)
val icMask = (~UInt(0, fetchWidth*coreInstBits) << (nBufValid << log2Ceil(coreInstBits)))(fetchWidth*coreInstBits-1,0)
val inst = icData & icMask | buf.data & ~icMask
val valid = (UIntToOH(nValid) - 1)(fetchWidth-1, 0)
val bufMask = UIntToOH(nBufValid) - 1
val xcpt_if = valid & (Mux(buf.xcpt_if, bufMask, UInt(0)) | Mux(io.imem.bits.xcpt_if, ~bufMask, UInt(0)))
val ic_replay = valid & (Mux(buf.replay, bufMask, UInt(0)) | Mux(io.imem.bits.replay, ~bufMask, UInt(0)))
val ibufBTBHitMask = Mux(ibufBTBHit, UIntToOH(ibufBTBResp.bridx), UInt(0))
val icBTBHitMask = Mux(io.imem.bits.btb.valid, UIntToOH(io.imem.bits.btb.bits.bridx +& nBufValid - pcWordBits), UInt(0))
val btbHitMask = ibufBTBHitMask & bufMask | icBTBHitMask & ~bufMask
io.btb_resp := Mux((ibufBTBHitMask & bufMask).orR, ibufBTBResp, io.imem.bits.btb.bits)
io.pc := Mux(nBufValid > 0, buf.pc, io.imem.bits.pc)
expand(0, 0, inst)
def expand(i: Int, j: UInt, curInst: UInt): Unit = if (i < retireWidth) {
val exp = Module(new RVCExpander)
exp.io.in := curInst
io.inst(i).bits.inst := exp.io.out
if (usingCompressed) {
val replay = ic_replay(j) || (!exp.io.rvc && (btbHitMask(j) || ic_replay(j+1)))
io.inst(i).valid := valid(j) && (exp.io.rvc || valid(j+1) || xcpt_if(j+1) || replay)
io.inst(i).bits.pf0 := xcpt_if(j)
io.inst(i).bits.pf1 := !exp.io.rvc && xcpt_if(j+1)
io.inst(i).bits.replay := replay
io.inst(i).bits.btb_hit := btbHitMask(j) || (!exp.io.rvc && btbHitMask(j+1))
io.inst(i).bits.rvc := exp.io.rvc
when (io.inst(i).fire()) { nReady := Mux(exp.io.rvc, j+1, j+2) }
expand(i+1, Mux(exp.io.rvc, j+1, j+2), Mux(exp.io.rvc, curInst >> 16, curInst >> 32))
} else {
when (io.inst(i).ready) { nReady := i+1 }
io.inst(i).valid := valid(i)
io.inst(i).bits.pf0 := xcpt_if(i)
io.inst(i).bits.pf1 := false
io.inst(i).bits.replay := ic_replay(i)
io.inst(i).bits.rvc := false
io.inst(i).bits.btb_hit := btbHitMask(i)
expand(i+1, null, curInst >> 32)
}
}
def shiftInsnLeft(in: UInt, dist: UInt) = {
val r = in.getWidth/coreInstBits
require(in.getWidth % coreInstBits == 0)
val data = Cat(Fill((1 << (log2Ceil(r) + 1)) - r, in >> (r-1)*coreInstBits), in)
data << (dist << log2Ceil(coreInstBits))
}
def shiftInsnRight(in: UInt, dist: UInt) = {
val r = in.getWidth/coreInstBits
require(in.getWidth % coreInstBits == 0)
val data = Cat(Fill((1 << (log2Ceil(r) + 1)) - r, in >> (r-1)*coreInstBits), in)
data >> (dist << log2Ceil(coreInstBits))
}
}

View File

@ -0,0 +1,157 @@
package rocket
import Chisel._
import uncore.agents._
import uncore.tilelink._
import uncore.util._
import Util._
import cde.{Parameters, Field}
trait HasL1CacheParameters extends HasCacheParameters with HasCoreParameters {
val outerDataBeats = p(TLKey(p(TLId))).dataBeats
val outerDataBits = p(TLKey(p(TLId))).dataBitsPerBeat
val refillCyclesPerBeat = outerDataBits/rowBits
val refillCycles = refillCyclesPerBeat*outerDataBeats
}
class ICacheReq(implicit p: Parameters) extends CoreBundle()(p) with HasL1CacheParameters {
val addr = UInt(width = vaddrBits)
}
class ICacheResp(implicit p: Parameters) extends CoreBundle()(p) with HasL1CacheParameters {
val data = Bits(width = coreInstBits)
val datablock = Bits(width = rowBits)
}
class ICache(latency: Int)(implicit p: Parameters) extends CoreModule()(p) with HasL1CacheParameters {
val io = new Bundle {
val req = Valid(new ICacheReq).flip
val s1_ppn = UInt(INPUT, ppnBits) // delayed one cycle w.r.t. req
val s1_kill = Bool(INPUT) // delayed one cycle w.r.t. req
val s2_kill = Bool(INPUT) // delayed two cycles; prevents I$ miss emission
val resp = Decoupled(new ICacheResp)
val invalidate = Bool(INPUT)
val mem = new ClientUncachedTileLinkIO
}
require(isPow2(nSets) && isPow2(nWays))
require(isPow2(coreInstBytes))
require(!usingVM || pgIdxBits >= untagBits)
val s_ready :: s_request :: s_refill_wait :: s_refill :: Nil = Enum(UInt(), 4)
val state = Reg(init=s_ready)
val invalidated = Reg(Bool())
val stall = !io.resp.ready
val rdy = Wire(Bool())
val refill_addr = Reg(UInt(width = paddrBits))
val s1_any_tag_hit = Wire(Bool())
val s1_valid = Reg(init=Bool(false))
val s1_vaddr = Reg(UInt())
val s1_paddr = Cat(io.s1_ppn, s1_vaddr(pgIdxBits-1,0))
val s1_tag = s1_paddr(tagBits+untagBits-1,untagBits)
val s0_valid = io.req.valid || s1_valid && stall
val s0_vaddr = Mux(s1_valid && stall, s1_vaddr, io.req.bits.addr)
s1_valid := io.req.valid && rdy || s1_valid && stall && !io.s1_kill
when (io.req.valid && rdy) {
s1_vaddr := io.req.bits.addr
}
val out_valid = s1_valid && !io.s1_kill && state === s_ready
val s1_idx = s1_vaddr(untagBits-1,blockOffBits)
val s1_hit = out_valid && s1_any_tag_hit
val s1_miss = out_valid && !s1_any_tag_hit
rdy := state === s_ready && !s1_miss
when (s1_miss && state === s_ready) {
refill_addr := s1_paddr
}
val refill_tag = refill_addr(tagBits+untagBits-1,untagBits)
val narrow_grant = FlowThroughSerializer(io.mem.grant, refillCyclesPerBeat)
val (refill_cnt, refill_wrap) = Counter(narrow_grant.fire(), refillCycles)
val refill_done = state === s_refill && refill_wrap
narrow_grant.ready := Bool(true)
val repl_way = if (isDM) UInt(0) else LFSR16(s1_miss)(log2Up(nWays)-1,0)
val entagbits = code.width(tagBits)
val tag_array = SeqMem(nSets, Vec(nWays, Bits(width = entagbits)))
val tag_rdata = tag_array.read(s0_vaddr(untagBits-1,blockOffBits), !refill_done && s0_valid)
when (refill_done) {
val tag = code.encode(refill_tag)
tag_array.write(s1_idx, Vec.fill(nWays)(tag), Vec.tabulate(nWays)(repl_way === _))
}
val vb_array = Reg(init=Bits(0, nSets*nWays))
when (refill_done && !invalidated) {
vb_array := vb_array.bitSet(Cat(repl_way, s1_idx), Bool(true))
}
when (io.invalidate) {
vb_array := Bits(0)
invalidated := Bool(true)
}
val s1_disparity = Wire(Vec(nWays, Bool()))
for (i <- 0 until nWays)
when (s1_valid && s1_disparity(i)) { vb_array := vb_array.bitSet(Cat(UInt(i), s1_idx), Bool(false)) }
val s1_tag_match = Wire(Vec(nWays, Bool()))
val s1_tag_hit = Wire(Vec(nWays, Bool()))
val s1_dout = Wire(Vec(nWays, Bits(width = rowBits)))
for (i <- 0 until nWays) {
val s1_vb = !io.invalidate && vb_array(Cat(UInt(i), s1_vaddr(untagBits-1,blockOffBits))).toBool
val tag_out = tag_rdata(i)
val s1_tag_disparity = code.decode(tag_out).error
s1_tag_match(i) := tag_out(tagBits-1,0) === s1_tag
s1_tag_hit(i) := s1_vb && s1_tag_match(i)
s1_disparity(i) := s1_vb && (s1_tag_disparity || code.decode(s1_dout(i)).error)
}
s1_any_tag_hit := s1_tag_hit.reduceLeft(_||_) && !s1_disparity.reduceLeft(_||_)
for (i <- 0 until nWays) {
val data_array = SeqMem(nSets * refillCycles, Bits(width = code.width(rowBits)))
val wen = narrow_grant.valid && repl_way === UInt(i)
when (wen) {
val e_d = code.encode(narrow_grant.bits.data)
data_array.write((s1_idx << log2Ceil(refillCycles)) | refill_cnt, e_d)
}
val s0_raddr = s0_vaddr(untagBits-1,blockOffBits-log2Ceil(refillCycles))
s1_dout(i) := data_array.read(s0_raddr, !wen && s0_valid)
}
// output signals
latency match {
case 1 =>
io.resp.bits.datablock := Mux1H(s1_tag_hit, s1_dout)
io.resp.valid := s1_hit
case 2 =>
val s2_hit = RegEnable(s1_hit, !stall)
val s2_tag_hit = RegEnable(s1_tag_hit, !stall)
val s2_dout = RegEnable(s1_dout, !stall)
io.resp.bits.datablock := Mux1H(s2_tag_hit, s2_dout)
io.resp.valid := s2_hit
}
io.mem.acquire.valid := state === s_request && !io.s2_kill
io.mem.acquire.bits := GetBlock(addr_block = refill_addr >> blockOffBits)
// control state machine
switch (state) {
is (s_ready) {
when (s1_miss) { state := s_request }
invalidated := Bool(false)
}
is (s_request) {
when (io.mem.acquire.ready) { state := s_refill_wait }
when (io.s2_kill) { state := s_ready }
}
is (s_refill_wait) {
when (io.mem.grant.valid) { state := s_refill }
}
is (s_refill) {
when (refill_done) { state := s_ready }
}
}
}

View File

@ -0,0 +1,314 @@
// See LICENSE for license details
package rocket
import Chisel._
import Instructions._
import uncore.constants.MemoryOpConstants._
import ALU._
import cde.Parameters
import Util._
abstract trait DecodeConstants extends HasCoreParameters
{
val table: Array[(BitPat, List[BitPat])]
}
class IntCtrlSigs extends Bundle {
val legal = Bool()
val fp = Bool()
val rocc = Bool()
val branch = Bool()
val jal = Bool()
val jalr = Bool()
val rxs2 = Bool()
val rxs1 = Bool()
val sel_alu2 = Bits(width = A2_X.getWidth)
val sel_alu1 = Bits(width = A1_X.getWidth)
val sel_imm = Bits(width = IMM_X.getWidth)
val alu_dw = Bool()
val alu_fn = Bits(width = FN_X.getWidth)
val mem = Bool()
val mem_cmd = Bits(width = M_SZ)
val mem_type = Bits(width = MT_SZ)
val rfs1 = Bool()
val rfs2 = Bool()
val rfs3 = Bool()
val wfd = Bool()
val div = Bool()
val wxd = Bool()
val csr = Bits(width = CSR.SZ)
val fence_i = Bool()
val fence = Bool()
val amo = Bool()
def default: List[BitPat] =
// jal renf1 fence.i
// val | jalr | renf2 |
// | fp_val| | renx2 | | renf3 |
// | | rocc| | | renx1 s_alu1 mem_val | | | wfd |
// | | | br| | | | s_alu2 | imm dw alu | mem_cmd mem_type| | | | div |
// | | | | | | | | | | | | | | | | | | | | | wxd | fence
// | | | | | | | | | | | | | | | | | | | | | | csr | | amo
// | | | | | | | | | | | | | | | | | | | | | | | | | |
List(N,X,X,X,X,X,X,X,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, X,X,X,X,X,X,CSR.X,X,X,X)
def decode(inst: UInt, table: Iterable[(BitPat, List[BitPat])]) = {
val decoder = DecodeLogic(inst, default, table)
val sigs = Seq(legal, fp, rocc, branch, jal, jalr, rxs2, rxs1, sel_alu2,
sel_alu1, sel_imm, alu_dw, alu_fn, mem, mem_cmd, mem_type,
rfs1, rfs2, rfs3, wfd, div, wxd, csr, fence_i, fence, amo)
sigs zip decoder map {case(s,d) => s := d}
this
}
}
class IDecode(implicit val p: Parameters) extends DecodeConstants
{
val table: Array[(BitPat, List[BitPat])] = Array(
BNE-> List(Y,N,N,Y,N,N,Y,Y,A2_RS2, A1_RS1, IMM_SB,DW_X, FN_SNE, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N),
BEQ-> List(Y,N,N,Y,N,N,Y,Y,A2_RS2, A1_RS1, IMM_SB,DW_X, FN_SEQ, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N),
BLT-> List(Y,N,N,Y,N,N,Y,Y,A2_RS2, A1_RS1, IMM_SB,DW_X, FN_SLT, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N),
BLTU-> List(Y,N,N,Y,N,N,Y,Y,A2_RS2, A1_RS1, IMM_SB,DW_X, FN_SLTU, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N),
BGE-> List(Y,N,N,Y,N,N,Y,Y,A2_RS2, A1_RS1, IMM_SB,DW_X, FN_SGE, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N),
BGEU-> List(Y,N,N,Y,N,N,Y,Y,A2_RS2, A1_RS1, IMM_SB,DW_X, FN_SGEU, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N),
JAL-> List(Y,N,N,N,Y,N,N,N,A2_SIZE,A1_PC, IMM_UJ,DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
JALR-> List(Y,N,N,N,N,Y,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
AUIPC-> List(Y,N,N,N,N,N,N,N,A2_IMM, A1_PC, IMM_U, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
LB-> List(Y,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_B, N,N,N,N,N,Y,CSR.N,N,N,N),
LH-> List(Y,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_H, N,N,N,N,N,Y,CSR.N,N,N,N),
LW-> List(Y,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_W, N,N,N,N,N,Y,CSR.N,N,N,N),
LBU-> List(Y,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_BU,N,N,N,N,N,Y,CSR.N,N,N,N),
LHU-> List(Y,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_HU,N,N,N,N,N,Y,CSR.N,N,N,N),
SB-> List(Y,N,N,N,N,N,Y,Y,A2_IMM, A1_RS1, IMM_S, DW_XPR,FN_ADD, Y,M_XWR, MT_B, N,N,N,N,N,N,CSR.N,N,N,N),
SH-> List(Y,N,N,N,N,N,Y,Y,A2_IMM, A1_RS1, IMM_S, DW_XPR,FN_ADD, Y,M_XWR, MT_H, N,N,N,N,N,N,CSR.N,N,N,N),
SW-> List(Y,N,N,N,N,N,Y,Y,A2_IMM, A1_RS1, IMM_S, DW_XPR,FN_ADD, Y,M_XWR, MT_W, N,N,N,N,N,N,CSR.N,N,N,N),
LUI-> List(Y,N,N,N,N,N,N,N,A2_IMM, A1_ZERO,IMM_U, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
ADDI-> List(Y,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
SLTI -> List(Y,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_SLT, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
SLTIU-> List(Y,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_SLTU, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
ANDI-> List(Y,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_AND, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
ORI-> List(Y,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_OR, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
XORI-> List(Y,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_XOR, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
SLLI-> List(Y,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_SL, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
SRLI-> List(Y,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_SR, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
SRAI-> List(Y,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_SRA, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
ADD-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
SUB-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_SUB, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
SLT-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_SLT, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
SLTU-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_SLTU, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
AND-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_AND, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
OR-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_OR, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
XOR-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_XOR, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
SLL-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_SL, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
SRL-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_SR, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
SRA-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_SRA, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
FENCE-> List(Y,N,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,Y,N),
FENCE_I-> List(Y,N,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, Y,M_FLUSH_ALL,MT_X, N,N,N,N,N,N,CSR.N,Y,N,N),
SCALL-> List(Y,N,N,N,N,N,N,X,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,N,N,N,CSR.I,N,N,N),
SBREAK-> List(Y,N,N,N,N,N,N,X,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,N,N,N,CSR.I,N,N,N),
MRET-> List(Y,N,N,N,N,N,N,X,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,N,N,N,CSR.I,N,N,N),
WFI-> List(Y,N,N,N,N,N,N,X,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,N,N,N,CSR.I,N,N,N),
CSRRW-> List(Y,N,N,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.W,N,N,N),
CSRRS-> List(Y,N,N,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.S,N,N,N),
CSRRC-> List(Y,N,N,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.C,N,N,N),
CSRRWI-> List(Y,N,N,N,N,N,N,N,A2_IMM, A1_ZERO,IMM_Z, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.W,N,N,N),
CSRRSI-> List(Y,N,N,N,N,N,N,N,A2_IMM, A1_ZERO,IMM_Z, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.S,N,N,N),
CSRRCI-> List(Y,N,N,N,N,N,N,N,A2_IMM, A1_ZERO,IMM_Z, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.C,N,N,N))
}
class SDecode(implicit val p: Parameters) extends DecodeConstants
{
val table: Array[(BitPat, List[BitPat])] = Array(
SFENCE_VM-> List(Y,N,N,N,N,N,N,X,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,N,N,N,CSR.I,N,N,N),
SRET-> List(Y,N,N,N,N,N,N,X,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,N,N,N,CSR.I,N,N,N))
}
class DebugDecode(implicit val p: Parameters) extends DecodeConstants
{
val table: Array[(BitPat, List[BitPat])] = Array(
DRET-> List(Y,N,N,N,N,N,N,X,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,N,N,N,CSR.I,N,N,N))
}
class I64Decode(implicit val p: Parameters) extends DecodeConstants
{
val table: Array[(BitPat, List[BitPat])] = Array(
LD-> List(Y,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_D, N,N,N,N,N,Y,CSR.N,N,N,N),
LWU-> List(Y,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_WU,N,N,N,N,N,Y,CSR.N,N,N,N),
SD-> List(Y,N,N,N,N,N,Y,Y,A2_IMM, A1_RS1, IMM_S, DW_XPR,FN_ADD, Y,M_XWR, MT_D, N,N,N,N,N,N,CSR.N,N,N,N),
ADDIW-> List(Y,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_32,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
SLLIW-> List(Y,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_32,FN_SL, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
SRLIW-> List(Y,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_32,FN_SR, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
SRAIW-> List(Y,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_32,FN_SRA, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
ADDW-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
SUBW-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32,FN_SUB, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
SLLW-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32,FN_SL, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
SRLW-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32,FN_SR, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
SRAW-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32,FN_SRA, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N))
}
class MDecode(implicit val p: Parameters) extends DecodeConstants
{
val table: Array[(BitPat, List[BitPat])] = Array(
MUL-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_MUL, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N),
MULH-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_MULH, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N),
MULHU-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_MULHU, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N),
MULHSU-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_MULHSU,N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N),
DIV-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_DIV, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N),
DIVU-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_DIVU, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N),
REM-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_REM, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N),
REMU-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_REMU, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N))
}
class M64Decode(implicit val p: Parameters) extends DecodeConstants
{
val table: Array[(BitPat, List[BitPat])] = Array(
MULW-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32, FN_MUL, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N),
DIVW-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32, FN_DIV, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N),
DIVUW-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32, FN_DIVU, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N),
REMW-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32, FN_REM, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N),
REMUW-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32, FN_REMU, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N))
}
class ADecode(implicit val p: Parameters) extends DecodeConstants
{
val table: Array[(BitPat, List[BitPat])] = Array(
AMOADD_W-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_ADD, MT_W, N,N,N,N,N,Y,CSR.N,N,N,Y),
AMOXOR_W-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_XOR, MT_W, N,N,N,N,N,Y,CSR.N,N,N,Y),
AMOSWAP_W-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_SWAP, MT_W, N,N,N,N,N,Y,CSR.N,N,N,Y),
AMOAND_W-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_AND, MT_W, N,N,N,N,N,Y,CSR.N,N,N,Y),
AMOOR_W-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_OR, MT_W, N,N,N,N,N,Y,CSR.N,N,N,Y),
AMOMIN_W-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MIN, MT_W, N,N,N,N,N,Y,CSR.N,N,N,Y),
AMOMINU_W-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MINU, MT_W, N,N,N,N,N,Y,CSR.N,N,N,Y),
AMOMAX_W-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MAX, MT_W, N,N,N,N,N,Y,CSR.N,N,N,Y),
AMOMAXU_W-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MAXU, MT_W, N,N,N,N,N,Y,CSR.N,N,N,Y),
LR_W-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XLR, MT_W, N,N,N,N,N,Y,CSR.N,N,N,Y),
SC_W-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XSC, MT_W, N,N,N,N,N,Y,CSR.N,N,N,Y))
}
class A64Decode(implicit val p: Parameters) extends DecodeConstants
{
val table: Array[(BitPat, List[BitPat])] = Array(
AMOADD_D-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_ADD, MT_D, N,N,N,N,N,Y,CSR.N,N,N,Y),
AMOSWAP_D-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_SWAP, MT_D, N,N,N,N,N,Y,CSR.N,N,N,Y),
AMOXOR_D-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_XOR, MT_D, N,N,N,N,N,Y,CSR.N,N,N,Y),
AMOAND_D-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_AND, MT_D, N,N,N,N,N,Y,CSR.N,N,N,Y),
AMOOR_D-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_OR, MT_D, N,N,N,N,N,Y,CSR.N,N,N,Y),
AMOMIN_D-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MIN, MT_D, N,N,N,N,N,Y,CSR.N,N,N,Y),
AMOMINU_D-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MINU, MT_D, N,N,N,N,N,Y,CSR.N,N,N,Y),
AMOMAX_D-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MAX, MT_D, N,N,N,N,N,Y,CSR.N,N,N,Y),
AMOMAXU_D-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MAXU, MT_D, N,N,N,N,N,Y,CSR.N,N,N,Y),
LR_D-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XLR, MT_D, N,N,N,N,N,Y,CSR.N,N,N,Y),
SC_D-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XSC, MT_D, N,N,N,N,N,Y,CSR.N,N,N,Y))
}
class FDecode(implicit val p: Parameters) extends DecodeConstants
{
val table: Array[(BitPat, List[BitPat])] = Array(
FCVT_S_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,Y,N,N,CSR.N,N,N,N),
FCVT_D_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,Y,N,N,CSR.N,N,N,N),
FSGNJ_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N),
FSGNJ_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N),
FSGNJX_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N),
FSGNJX_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N),
FSGNJN_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N),
FSGNJN_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N),
FMIN_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N),
FMIN_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N),
FMAX_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N),
FMAX_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N),
FADD_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N),
FADD_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N),
FSUB_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N),
FSUB_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N),
FMUL_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N),
FMUL_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N),
FMADD_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,Y,Y,N,N,CSR.N,N,N,N),
FMADD_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,Y,Y,N,N,CSR.N,N,N,N),
FMSUB_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,Y,Y,N,N,CSR.N,N,N,N),
FMSUB_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,Y,Y,N,N,CSR.N,N,N,N),
FNMADD_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,Y,Y,N,N,CSR.N,N,N,N),
FNMADD_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,Y,Y,N,N,CSR.N,N,N,N),
FNMSUB_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,Y,Y,N,N,CSR.N,N,N,N),
FNMSUB_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,Y,Y,N,N,CSR.N,N,N,N),
FCLASS_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N),
FCLASS_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N),
FMV_X_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N),
FCVT_W_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N),
FCVT_W_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N),
FCVT_WU_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N),
FCVT_WU_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N),
FEQ_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,N,N,Y,CSR.N,N,N,N),
FEQ_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,N,N,Y,CSR.N,N,N,N),
FLT_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,N,N,Y,CSR.N,N,N,N),
FLT_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,N,N,Y,CSR.N,N,N,N),
FLE_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,N,N,Y,CSR.N,N,N,N),
FLE_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,N,N,Y,CSR.N,N,N,N),
FMV_S_X-> List(Y,Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,Y,N,N,CSR.N,N,N,N),
FCVT_S_W-> List(Y,Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,Y,N,N,CSR.N,N,N,N),
FCVT_D_W-> List(Y,Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,Y,N,N,CSR.N,N,N,N),
FCVT_S_WU-> List(Y,Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,Y,N,N,CSR.N,N,N,N),
FCVT_D_WU-> List(Y,Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,Y,N,N,CSR.N,N,N,N),
FLW-> List(Y,Y,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_W, N,N,N,Y,N,N,CSR.N,N,N,N),
FLD-> List(Y,Y,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_D, N,N,N,Y,N,N,CSR.N,N,N,N),
FSW-> List(Y,Y,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_S, DW_XPR,FN_ADD, Y,M_XWR, MT_W, N,Y,N,N,N,N,CSR.N,N,N,N),
FSD-> List(Y,Y,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_S, DW_XPR,FN_ADD, Y,M_XWR, MT_D, N,Y,N,N,N,N,CSR.N,N,N,N))
}
class F64Decode(implicit val p: Parameters) extends DecodeConstants
{
val table: Array[(BitPat, List[BitPat])] = Array(
FMV_X_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N),
FCVT_L_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N),
FCVT_L_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N),
FCVT_LU_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N),
FCVT_LU_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N),
FMV_D_X-> List(Y,Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,Y,N,N,CSR.N,N,N,N),
FCVT_S_L-> List(Y,Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,Y,N,N,CSR.N,N,N,N),
FCVT_D_L-> List(Y,Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,Y,N,N,CSR.N,N,N,N),
FCVT_S_LU-> List(Y,Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,Y,N,N,CSR.N,N,N,N),
FCVT_D_LU-> List(Y,Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,Y,N,N,CSR.N,N,N,N),
FDIV_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N),
FDIV_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N),
FSQRT_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N),
FSQRT_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N))
}
class RoCCDecode(implicit val p: Parameters) extends DecodeConstants
{
val table: Array[(BitPat, List[BitPat])] = Array(
CUSTOM0-> List(Y,N,Y,N,N,N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N),
CUSTOM0_RS1-> List(Y,N,Y,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N),
CUSTOM0_RS1_RS2-> List(Y,N,Y,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N),
CUSTOM0_RD-> List(Y,N,Y,N,N,N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
CUSTOM0_RD_RS1-> List(Y,N,Y,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
CUSTOM0_RD_RS1_RS2->List(Y,N,Y,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
CUSTOM1-> List(Y,N,Y,N,N,N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N),
CUSTOM1_RS1-> List(Y,N,Y,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N),
CUSTOM1_RS1_RS2-> List(Y,N,Y,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N),
CUSTOM1_RD-> List(Y,N,Y,N,N,N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
CUSTOM1_RD_RS1-> List(Y,N,Y,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
CUSTOM1_RD_RS1_RS2->List(Y,N,Y,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
CUSTOM2-> List(Y,N,Y,N,N,N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N),
CUSTOM2_RS1-> List(Y,N,Y,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N),
CUSTOM2_RS1_RS2-> List(Y,N,Y,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N),
CUSTOM2_RD-> List(Y,N,Y,N,N,N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
CUSTOM2_RD_RS1-> List(Y,N,Y,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
CUSTOM2_RD_RS1_RS2->List(Y,N,Y,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
CUSTOM3-> List(Y,N,Y,N,N,N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N),
CUSTOM3_RS1-> List(Y,N,Y,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N),
CUSTOM3_RS1_RS2-> List(Y,N,Y,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N),
CUSTOM3_RD-> List(Y,N,Y,N,N,N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
CUSTOM3_RD_RS1-> List(Y,N,Y,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
CUSTOM3_RD_RS1_RS2->List(Y,N,Y,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N))
}

View File

@ -0,0 +1,383 @@
// See LICENSE for license details.
package rocket
import Chisel._
/* Automatically generated by parse-opcodes */
object Instructions {
def BEQ = BitPat("b?????????????????000?????1100011")
def BNE = BitPat("b?????????????????001?????1100011")
def BLT = BitPat("b?????????????????100?????1100011")
def BGE = BitPat("b?????????????????101?????1100011")
def BLTU = BitPat("b?????????????????110?????1100011")
def BGEU = BitPat("b?????????????????111?????1100011")
def JALR = BitPat("b?????????????????000?????1100111")
def JAL = BitPat("b?????????????????????????1101111")
def LUI = BitPat("b?????????????????????????0110111")
def AUIPC = BitPat("b?????????????????????????0010111")
def ADDI = BitPat("b?????????????????000?????0010011")
def SLLI = BitPat("b000000???????????001?????0010011")
def SLTI = BitPat("b?????????????????010?????0010011")
def SLTIU = BitPat("b?????????????????011?????0010011")
def XORI = BitPat("b?????????????????100?????0010011")
def SRLI = BitPat("b000000???????????101?????0010011")
def SRAI = BitPat("b010000???????????101?????0010011")
def ORI = BitPat("b?????????????????110?????0010011")
def ANDI = BitPat("b?????????????????111?????0010011")
def ADD = BitPat("b0000000??????????000?????0110011")
def SUB = BitPat("b0100000??????????000?????0110011")
def SLL = BitPat("b0000000??????????001?????0110011")
def SLT = BitPat("b0000000??????????010?????0110011")
def SLTU = BitPat("b0000000??????????011?????0110011")
def XOR = BitPat("b0000000??????????100?????0110011")
def SRL = BitPat("b0000000??????????101?????0110011")
def SRA = BitPat("b0100000??????????101?????0110011")
def OR = BitPat("b0000000??????????110?????0110011")
def AND = BitPat("b0000000??????????111?????0110011")
def ADDIW = BitPat("b?????????????????000?????0011011")
def SLLIW = BitPat("b0000000??????????001?????0011011")
def SRLIW = BitPat("b0000000??????????101?????0011011")
def SRAIW = BitPat("b0100000??????????101?????0011011")
def ADDW = BitPat("b0000000??????????000?????0111011")
def SUBW = BitPat("b0100000??????????000?????0111011")
def SLLW = BitPat("b0000000??????????001?????0111011")
def SRLW = BitPat("b0000000??????????101?????0111011")
def SRAW = BitPat("b0100000??????????101?????0111011")
def LB = BitPat("b?????????????????000?????0000011")
def LH = BitPat("b?????????????????001?????0000011")
def LW = BitPat("b?????????????????010?????0000011")
def LD = BitPat("b?????????????????011?????0000011")
def LBU = BitPat("b?????????????????100?????0000011")
def LHU = BitPat("b?????????????????101?????0000011")
def LWU = BitPat("b?????????????????110?????0000011")
def SB = BitPat("b?????????????????000?????0100011")
def SH = BitPat("b?????????????????001?????0100011")
def SW = BitPat("b?????????????????010?????0100011")
def SD = BitPat("b?????????????????011?????0100011")
def FENCE = BitPat("b?????????????????000?????0001111")
def FENCE_I = BitPat("b?????????????????001?????0001111")
def MUL = BitPat("b0000001??????????000?????0110011")
def MULH = BitPat("b0000001??????????001?????0110011")
def MULHSU = BitPat("b0000001??????????010?????0110011")
def MULHU = BitPat("b0000001??????????011?????0110011")
def DIV = BitPat("b0000001??????????100?????0110011")
def DIVU = BitPat("b0000001??????????101?????0110011")
def REM = BitPat("b0000001??????????110?????0110011")
def REMU = BitPat("b0000001??????????111?????0110011")
def MULW = BitPat("b0000001??????????000?????0111011")
def DIVW = BitPat("b0000001??????????100?????0111011")
def DIVUW = BitPat("b0000001??????????101?????0111011")
def REMW = BitPat("b0000001??????????110?????0111011")
def REMUW = BitPat("b0000001??????????111?????0111011")
def AMOADD_W = BitPat("b00000????????????010?????0101111")
def AMOXOR_W = BitPat("b00100????????????010?????0101111")
def AMOOR_W = BitPat("b01000????????????010?????0101111")
def AMOAND_W = BitPat("b01100????????????010?????0101111")
def AMOMIN_W = BitPat("b10000????????????010?????0101111")
def AMOMAX_W = BitPat("b10100????????????010?????0101111")
def AMOMINU_W = BitPat("b11000????????????010?????0101111")
def AMOMAXU_W = BitPat("b11100????????????010?????0101111")
def AMOSWAP_W = BitPat("b00001????????????010?????0101111")
def LR_W = BitPat("b00010??00000?????010?????0101111")
def SC_W = BitPat("b00011????????????010?????0101111")
def AMOADD_D = BitPat("b00000????????????011?????0101111")
def AMOXOR_D = BitPat("b00100????????????011?????0101111")
def AMOOR_D = BitPat("b01000????????????011?????0101111")
def AMOAND_D = BitPat("b01100????????????011?????0101111")
def AMOMIN_D = BitPat("b10000????????????011?????0101111")
def AMOMAX_D = BitPat("b10100????????????011?????0101111")
def AMOMINU_D = BitPat("b11000????????????011?????0101111")
def AMOMAXU_D = BitPat("b11100????????????011?????0101111")
def AMOSWAP_D = BitPat("b00001????????????011?????0101111")
def LR_D = BitPat("b00010??00000?????011?????0101111")
def SC_D = BitPat("b00011????????????011?????0101111")
def ECALL = BitPat("b00000000000000000000000001110011")
def EBREAK = BitPat("b00000000000100000000000001110011")
def URET = BitPat("b00000000001000000000000001110011")
def SRET = BitPat("b00010000001000000000000001110011")
def HRET = BitPat("b00100000001000000000000001110011")
def MRET = BitPat("b00110000001000000000000001110011")
def DRET = BitPat("b01111011001000000000000001110011")
def SFENCE_VM = BitPat("b000100000100?????000000001110011")
def WFI = BitPat("b00010000010100000000000001110011")
def CSRRW = BitPat("b?????????????????001?????1110011")
def CSRRS = BitPat("b?????????????????010?????1110011")
def CSRRC = BitPat("b?????????????????011?????1110011")
def CSRRWI = BitPat("b?????????????????101?????1110011")
def CSRRSI = BitPat("b?????????????????110?????1110011")
def CSRRCI = BitPat("b?????????????????111?????1110011")
def FADD_S = BitPat("b0000000??????????????????1010011")
def FSUB_S = BitPat("b0000100??????????????????1010011")
def FMUL_S = BitPat("b0001000??????????????????1010011")
def FDIV_S = BitPat("b0001100??????????????????1010011")
def FSGNJ_S = BitPat("b0010000??????????000?????1010011")
def FSGNJN_S = BitPat("b0010000??????????001?????1010011")
def FSGNJX_S = BitPat("b0010000??????????010?????1010011")
def FMIN_S = BitPat("b0010100??????????000?????1010011")
def FMAX_S = BitPat("b0010100??????????001?????1010011")
def FSQRT_S = BitPat("b010110000000?????????????1010011")
def FADD_D = BitPat("b0000001??????????????????1010011")
def FSUB_D = BitPat("b0000101??????????????????1010011")
def FMUL_D = BitPat("b0001001??????????????????1010011")
def FDIV_D = BitPat("b0001101??????????????????1010011")
def FSGNJ_D = BitPat("b0010001??????????000?????1010011")
def FSGNJN_D = BitPat("b0010001??????????001?????1010011")
def FSGNJX_D = BitPat("b0010001??????????010?????1010011")
def FMIN_D = BitPat("b0010101??????????000?????1010011")
def FMAX_D = BitPat("b0010101??????????001?????1010011")
def FCVT_S_D = BitPat("b010000000001?????????????1010011")
def FCVT_D_S = BitPat("b010000100000?????????????1010011")
def FSQRT_D = BitPat("b010110100000?????????????1010011")
def FLE_S = BitPat("b1010000??????????000?????1010011")
def FLT_S = BitPat("b1010000??????????001?????1010011")
def FEQ_S = BitPat("b1010000??????????010?????1010011")
def FLE_D = BitPat("b1010001??????????000?????1010011")
def FLT_D = BitPat("b1010001??????????001?????1010011")
def FEQ_D = BitPat("b1010001??????????010?????1010011")
def FCVT_W_S = BitPat("b110000000000?????????????1010011")
def FCVT_WU_S = BitPat("b110000000001?????????????1010011")
def FCVT_L_S = BitPat("b110000000010?????????????1010011")
def FCVT_LU_S = BitPat("b110000000011?????????????1010011")
def FMV_X_S = BitPat("b111000000000?????000?????1010011")
def FCLASS_S = BitPat("b111000000000?????001?????1010011")
def FCVT_W_D = BitPat("b110000100000?????????????1010011")
def FCVT_WU_D = BitPat("b110000100001?????????????1010011")
def FCVT_L_D = BitPat("b110000100010?????????????1010011")
def FCVT_LU_D = BitPat("b110000100011?????????????1010011")
def FMV_X_D = BitPat("b111000100000?????000?????1010011")
def FCLASS_D = BitPat("b111000100000?????001?????1010011")
def FCVT_S_W = BitPat("b110100000000?????????????1010011")
def FCVT_S_WU = BitPat("b110100000001?????????????1010011")
def FCVT_S_L = BitPat("b110100000010?????????????1010011")
def FCVT_S_LU = BitPat("b110100000011?????????????1010011")
def FMV_S_X = BitPat("b111100000000?????000?????1010011")
def FCVT_D_W = BitPat("b110100100000?????????????1010011")
def FCVT_D_WU = BitPat("b110100100001?????????????1010011")
def FCVT_D_L = BitPat("b110100100010?????????????1010011")
def FCVT_D_LU = BitPat("b110100100011?????????????1010011")
def FMV_D_X = BitPat("b111100100000?????000?????1010011")
def FLW = BitPat("b?????????????????010?????0000111")
def FLD = BitPat("b?????????????????011?????0000111")
def FSW = BitPat("b?????????????????010?????0100111")
def FSD = BitPat("b?????????????????011?????0100111")
def FMADD_S = BitPat("b?????00??????????????????1000011")
def FMSUB_S = BitPat("b?????00??????????????????1000111")
def FNMSUB_S = BitPat("b?????00??????????????????1001011")
def FNMADD_S = BitPat("b?????00??????????????????1001111")
def FMADD_D = BitPat("b?????01??????????????????1000011")
def FMSUB_D = BitPat("b?????01??????????????????1000111")
def FNMSUB_D = BitPat("b?????01??????????????????1001011")
def FNMADD_D = BitPat("b?????01??????????????????1001111")
def CUSTOM0 = BitPat("b?????????????????000?????0001011")
def CUSTOM0_RS1 = BitPat("b?????????????????010?????0001011")
def CUSTOM0_RS1_RS2 = BitPat("b?????????????????011?????0001011")
def CUSTOM0_RD = BitPat("b?????????????????100?????0001011")
def CUSTOM0_RD_RS1 = BitPat("b?????????????????110?????0001011")
def CUSTOM0_RD_RS1_RS2 = BitPat("b?????????????????111?????0001011")
def CUSTOM1 = BitPat("b?????????????????000?????0101011")
def CUSTOM1_RS1 = BitPat("b?????????????????010?????0101011")
def CUSTOM1_RS1_RS2 = BitPat("b?????????????????011?????0101011")
def CUSTOM1_RD = BitPat("b?????????????????100?????0101011")
def CUSTOM1_RD_RS1 = BitPat("b?????????????????110?????0101011")
def CUSTOM1_RD_RS1_RS2 = BitPat("b?????????????????111?????0101011")
def CUSTOM2 = BitPat("b?????????????????000?????1011011")
def CUSTOM2_RS1 = BitPat("b?????????????????010?????1011011")
def CUSTOM2_RS1_RS2 = BitPat("b?????????????????011?????1011011")
def CUSTOM2_RD = BitPat("b?????????????????100?????1011011")
def CUSTOM2_RD_RS1 = BitPat("b?????????????????110?????1011011")
def CUSTOM2_RD_RS1_RS2 = BitPat("b?????????????????111?????1011011")
def CUSTOM3 = BitPat("b?????????????????000?????1111011")
def CUSTOM3_RS1 = BitPat("b?????????????????010?????1111011")
def CUSTOM3_RS1_RS2 = BitPat("b?????????????????011?????1111011")
def CUSTOM3_RD = BitPat("b?????????????????100?????1111011")
def CUSTOM3_RD_RS1 = BitPat("b?????????????????110?????1111011")
def CUSTOM3_RD_RS1_RS2 = BitPat("b?????????????????111?????1111011")
def SLLI_RV32 = BitPat("b0000000??????????001?????0010011")
def SRLI_RV32 = BitPat("b0000000??????????101?????0010011")
def SRAI_RV32 = BitPat("b0100000??????????101?????0010011")
def FRFLAGS = BitPat("b00000000000100000010?????1110011")
def FSFLAGS = BitPat("b000000000001?????001?????1110011")
def FSFLAGSI = BitPat("b000000000001?????101?????1110011")
def FRRM = BitPat("b00000000001000000010?????1110011")
def FSRM = BitPat("b000000000010?????001?????1110011")
def FSRMI = BitPat("b000000000010?????101?????1110011")
def FSCSR = BitPat("b000000000011?????001?????1110011")
def FRCSR = BitPat("b00000000001100000010?????1110011")
def RDCYCLE = BitPat("b11000000000000000010?????1110011")
def RDTIME = BitPat("b11000000000100000010?????1110011")
def RDINSTRET = BitPat("b11000000001000000010?????1110011")
def RDCYCLEH = BitPat("b11001000000000000010?????1110011")
def RDTIMEH = BitPat("b11001000000100000010?????1110011")
def RDINSTRETH = BitPat("b11001000001000000010?????1110011")
def SCALL = BitPat("b00000000000000000000000001110011")
def SBREAK = BitPat("b00000000000100000000000001110011")
}
object Causes {
val misaligned_fetch = 0x0
val fault_fetch = 0x1
val illegal_instruction = 0x2
val breakpoint = 0x3
val misaligned_load = 0x4
val fault_load = 0x5
val misaligned_store = 0x6
val fault_store = 0x7
val user_ecall = 0x8
val supervisor_ecall = 0x9
val hypervisor_ecall = 0xa
val machine_ecall = 0xb
val all = {
val res = collection.mutable.ArrayBuffer[Int]()
res += misaligned_fetch
res += fault_fetch
res += illegal_instruction
res += breakpoint
res += misaligned_load
res += fault_load
res += misaligned_store
res += fault_store
res += user_ecall
res += supervisor_ecall
res += hypervisor_ecall
res += machine_ecall
res.toArray
}
}
object CSRs {
val fflags = 0x1
val frm = 0x2
val fcsr = 0x3
val cycle = 0xc00
val time = 0xc01
val instret = 0xc02
val sstatus = 0x100
val sie = 0x104
val stvec = 0x105
val sscratch = 0x140
val sepc = 0x141
val scause = 0x142
val sbadaddr = 0x143
val sip = 0x144
val sptbr = 0x180
val scycle = 0xd00
val stime = 0xd01
val sinstret = 0xd02
val mstatus = 0x300
val medeleg = 0x302
val mideleg = 0x303
val mie = 0x304
val mtvec = 0x305
val mscratch = 0x340
val mepc = 0x341
val mcause = 0x342
val mbadaddr = 0x343
val mip = 0x344
val mucounteren = 0x310
val mscounteren = 0x311
val mucycle_delta = 0x700
val mutime_delta = 0x701
val muinstret_delta = 0x702
val mscycle_delta = 0x704
val mstime_delta = 0x705
val msinstret_delta = 0x706
val tdrselect = 0x7a0
val tdrdata1 = 0x7a1
val tdrdata2 = 0x7a2
val tdrdata3 = 0x7a3
val dcsr = 0x7b0
val dpc = 0x7b1
val dscratch = 0x7b2
val mcycle = 0xf00
val mtime = 0xf01
val minstret = 0xf02
val misa = 0xf10
val mvendorid = 0xf11
val marchid = 0xf12
val mimpid = 0xf13
val mhartid = 0xf14
val mreset = 0x7c2
val cycleh = 0xc80
val timeh = 0xc81
val instreth = 0xc82
val mucycle_deltah = 0x780
val mutime_deltah = 0x781
val muinstret_deltah = 0x782
val mscycle_deltah = 0x784
val mstime_deltah = 0x785
val msinstret_deltah = 0x786
val mcycleh = 0xf80
val mtimeh = 0xf81
val minstreth = 0xf82
val all = {
val res = collection.mutable.ArrayBuffer[Int]()
res += fflags
res += frm
res += fcsr
res += cycle
res += time
res += instret
res += sstatus
res += sie
res += stvec
res += sscratch
res += sepc
res += scause
res += sbadaddr
res += sip
res += sptbr
res += scycle
res += stime
res += sinstret
res += mstatus
res += medeleg
res += mideleg
res += mie
res += mtvec
res += mscratch
res += mepc
res += mcause
res += mbadaddr
res += mip
res += mucounteren
res += mscounteren
res += mucycle_delta
res += mutime_delta
res += muinstret_delta
res += mscycle_delta
res += mstime_delta
res += msinstret_delta
res += tdrselect
res += tdrdata1
res += tdrdata2
res += tdrdata3
res += dcsr
res += dpc
res += dscratch
res += mcycle
res += mtime
res += minstret
res += misa
res += mvendorid
res += marchid
res += mimpid
res += mhartid
res += mreset
res.toArray
}
val all32 = {
val res = collection.mutable.ArrayBuffer(all:_*)
res += cycleh
res += timeh
res += instreth
res += mucycle_deltah
res += mutime_deltah
res += muinstret_deltah
res += mscycle_deltah
res += mstime_deltah
res += msinstret_deltah
res += mcycleh
res += mtimeh
res += minstreth
res.toArray
}
}

View File

@ -0,0 +1,154 @@
// See LICENSE for license details.
package rocket
import Chisel._
import ALU._
import Util._
class MultiplierReq(dataBits: Int, tagBits: Int) extends Bundle {
val fn = Bits(width = SZ_ALU_FN)
val dw = Bits(width = SZ_DW)
val in1 = Bits(width = dataBits)
val in2 = Bits(width = dataBits)
val tag = UInt(width = tagBits)
override def cloneType = new MultiplierReq(dataBits, tagBits).asInstanceOf[this.type]
}
class MultiplierResp(dataBits: Int, tagBits: Int) extends Bundle {
val data = Bits(width = dataBits)
val tag = UInt(width = tagBits)
override def cloneType = new MultiplierResp(dataBits, tagBits).asInstanceOf[this.type]
}
class MultiplierIO(dataBits: Int, tagBits: Int) extends Bundle {
val req = Decoupled(new MultiplierReq(dataBits, tagBits)).flip
val kill = Bool(INPUT)
val resp = Decoupled(new MultiplierResp(dataBits, tagBits))
}
case class MulDivConfig(
mulUnroll: Int = 1,
mulEarlyOut: Boolean = false,
divEarlyOut: Boolean = false
)
class MulDiv(cfg: MulDivConfig, width: Int, nXpr: Int = 32) extends Module {
val io = new MultiplierIO(width, log2Up(nXpr))
val w = io.req.bits.in1.getWidth
val mulw = (w + cfg.mulUnroll - 1) / cfg.mulUnroll * cfg.mulUnroll
val s_ready :: s_neg_inputs :: s_busy :: s_move_rem :: s_neg_output :: s_done :: Nil = Enum(UInt(), 6)
val state = Reg(init=s_ready)
val req = Reg(io.req.bits)
val count = Reg(UInt(width = log2Up(w+1)))
val neg_out = Reg(Bool())
val isMul = Reg(Bool())
val isHi = Reg(Bool())
val divisor = Reg(Bits(width = w+1)) // div only needs w bits
val remainder = Reg(Bits(width = 2*mulw+2)) // div only needs 2*w+1 bits
val cmdMul :: cmdHi :: lhsSigned :: rhsSigned :: Nil =
DecodeLogic(io.req.bits.fn, List(X, X, X, X), List(
FN_DIV -> List(N, N, Y, Y),
FN_REM -> List(N, Y, Y, Y),
FN_DIVU -> List(N, N, N, N),
FN_REMU -> List(N, Y, N, N),
FN_MUL -> List(Y, N, X, X),
FN_MULH -> List(Y, Y, Y, Y),
FN_MULHU -> List(Y, Y, N, N),
FN_MULHSU -> List(Y, Y, Y, N))).map(_ toBool)
require(w == 32 || w == 64)
def halfWidth(req: MultiplierReq) = Bool(w > 32) && req.dw === DW_32
def sext(x: Bits, halfW: Bool, signed: Bool) = {
val sign = signed && Mux(halfW, x(w/2-1), x(w-1))
val hi = Mux(halfW, Fill(w/2, sign), x(w-1,w/2))
(Cat(hi, x(w/2-1,0)), sign)
}
val (lhs_in, lhs_sign) = sext(io.req.bits.in1, halfWidth(io.req.bits), lhsSigned)
val (rhs_in, rhs_sign) = sext(io.req.bits.in2, halfWidth(io.req.bits), rhsSigned)
val subtractor = remainder(2*w,w) - divisor(w,0)
val less = subtractor(w)
val negated_remainder = -remainder(w-1,0)
when (state === s_neg_inputs) {
when (remainder(w-1) || isMul) {
remainder := negated_remainder
}
when (divisor(w-1) || isMul) {
divisor := subtractor
}
state := s_busy
}
when (state === s_neg_output) {
remainder := negated_remainder
state := s_done
}
when (state === s_move_rem) {
remainder := remainder(2*w, w+1)
state := Mux(neg_out, s_neg_output, s_done)
}
when (state === s_busy && isMul) {
val mulReg = Cat(remainder(2*mulw+1,w+1),remainder(w-1,0))
val mplier = mulReg(mulw-1,0)
val accum = mulReg(2*mulw,mulw).asSInt
val mpcand = divisor.asSInt
val prod = mplier(cfg.mulUnroll-1, 0) * mpcand + accum
val nextMulReg = Cat(prod, mplier(mulw-1, cfg.mulUnroll))
val eOutMask = (SInt(BigInt(-1) << mulw) >> (count * cfg.mulUnroll)(log2Up(mulw)-1,0))(mulw-1,0)
val eOut = Bool(cfg.mulEarlyOut) && count =/= mulw/cfg.mulUnroll-1 && count =/= 0 &&
!isHi && (mplier & ~eOutMask) === UInt(0)
val eOutRes = (mulReg >> (mulw - count * cfg.mulUnroll)(log2Up(mulw)-1,0))
val nextMulReg1 = Cat(nextMulReg(2*mulw,mulw), Mux(eOut, eOutRes, nextMulReg)(mulw-1,0))
remainder := Cat(nextMulReg1 >> w, Bool(false), nextMulReg1(w-1,0))
count := count + 1
when (eOut || count === mulw/cfg.mulUnroll-1) {
state := Mux(isHi, s_move_rem, s_done)
}
}
when (state === s_busy && !isMul) {
when (count === w) {
state := Mux(isHi, s_move_rem, Mux(neg_out, s_neg_output, s_done))
}
count := count + 1
remainder := Cat(Mux(less, remainder(2*w-1,w), subtractor(w-1,0)), remainder(w-1,0), !less)
val divisorMSB = Log2(divisor(w-1,0), w)
val dividendMSB = Log2(remainder(w-1,0), w)
val eOutPos = UInt(w-1) + divisorMSB - dividendMSB
val eOutZero = divisorMSB > dividendMSB
val eOut = count === 0 && less /* not divby0 */ && (eOutPos > 0 || eOutZero)
when (Bool(cfg.divEarlyOut) && eOut) {
val shift = Mux(eOutZero, UInt(w-1), eOutPos(log2Up(w)-1,0))
remainder := remainder(w-1,0) << shift
count := shift
}
when (count === 0 && !less /* divby0 */ && !isHi) { neg_out := false }
}
when (io.resp.fire() || io.kill) {
state := s_ready
}
when (io.req.fire()) {
state := Mux(lhs_sign || rhs_sign && !cmdMul, s_neg_inputs, s_busy)
isMul := cmdMul
isHi := cmdHi
count := 0
neg_out := !cmdMul && Mux(cmdHi, lhs_sign, lhs_sign =/= rhs_sign)
divisor := Cat(rhs_sign, rhs_in)
remainder := lhs_in
req := io.req.bits
}
io.resp.bits := req
io.resp.bits.data := Mux(halfWidth(req), Cat(Fill(w/2, remainder(w/2-1)), remainder(w/2-1,0)), remainder(w-1,0))
io.resp.valid := state === s_done
io.req.ready := state === s_ready
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,4 @@
// See LICENSE for license details.
package object rocket extends
rocket.constants.ScalarOpConstants

View File

@ -0,0 +1,218 @@
// See LICENSE for license details.
package rocket
import Chisel._
import uncore.agents._
import uncore.constants._
import Util._
import uncore.util._
import cde.{Parameters, Field}
class PTWReq(implicit p: Parameters) extends CoreBundle()(p) {
val prv = Bits(width = 2)
val pum = Bool()
val mxr = Bool()
val addr = UInt(width = vpnBits)
val store = Bool()
val fetch = Bool()
}
class PTWResp(implicit p: Parameters) extends CoreBundle()(p) {
val pte = new PTE
}
class TLBPTWIO(implicit p: Parameters) extends CoreBundle()(p) {
val req = Decoupled(new PTWReq)
val resp = Valid(new PTWResp).flip
val ptbr = new PTBR().asInput
val invalidate = Bool(INPUT)
val status = new MStatus().asInput
}
class DatapathPTWIO(implicit p: Parameters) extends CoreBundle()(p) {
val ptbr = new PTBR().asInput
val invalidate = Bool(INPUT)
val status = new MStatus().asInput
}
class PTE(implicit p: Parameters) extends CoreBundle()(p) {
val reserved_for_hardware = Bits(width = 16)
val ppn = UInt(width = 38)
val reserved_for_software = Bits(width = 2)
val d = Bool()
val a = Bool()
val g = Bool()
val u = Bool()
val x = Bool()
val w = Bool()
val r = Bool()
val v = Bool()
def table(dummy: Int = 0) = v && !r && !w && !x
def leaf(dummy: Int = 0) = v && (r || (x && !w))
def ur(dummy: Int = 0) = sr() && u
def uw(dummy: Int = 0) = sw() && u
def ux(dummy: Int = 0) = sx() && u
def sr(dummy: Int = 0) = leaf() && r
def sw(dummy: Int = 0) = leaf() && w
def sx(dummy: Int = 0) = leaf() && x
def access_ok(req: PTWReq) = {
val perm_ok = Mux(req.fetch, x, Mux(req.store, w, r || (x && req.mxr)))
val priv_ok = Mux(u, !req.pum, req.prv(0))
leaf() && priv_ok && perm_ok
}
}
class PTW(n: Int)(implicit p: Parameters) extends CoreModule()(p) {
val io = new Bundle {
val requestor = Vec(n, new TLBPTWIO).flip
val mem = new HellaCacheIO
val dpath = new DatapathPTWIO
}
require(usingAtomics, "PTW requires atomic memory operations")
val s_ready :: s_req :: s_wait1 :: s_wait2 :: s_set_dirty :: s_wait1_dirty :: s_wait2_dirty :: s_done :: Nil = Enum(UInt(), 8)
val state = Reg(init=s_ready)
val count = Reg(UInt(width = log2Up(pgLevels)))
val s1_kill = Reg(next = Bool(false))
val r_req = Reg(new PTWReq)
val r_req_dest = Reg(Bits())
val r_pte = Reg(new PTE)
val vpn_idxs = (0 until pgLevels).map(i => (r_req.addr >> (pgLevels-i-1)*pgLevelBits)(pgLevelBits-1,0))
val vpn_idx = vpn_idxs(count)
val arb = Module(new RRArbiter(new PTWReq, n))
arb.io.in <> io.requestor.map(_.req)
arb.io.out.ready := state === s_ready
val pte = {
val tmp = new PTE().fromBits(io.mem.resp.bits.data)
val res = Wire(init = new PTE().fromBits(io.mem.resp.bits.data))
res.ppn := tmp.ppn(ppnBits-1, 0)
when ((tmp.ppn >> ppnBits) =/= 0) { res.v := false }
res
}
val pte_addr = Cat(r_pte.ppn, vpn_idx) << log2Ceil(xLen/8)
when (arb.io.out.fire()) {
r_req := arb.io.out.bits
r_req_dest := arb.io.chosen
r_pte.ppn := io.dpath.ptbr.ppn
}
val (pte_cache_hit, pte_cache_data) = {
val size = 1 << log2Up(pgLevels * 2)
val plru = new PseudoLRU(size)
val valid = Reg(init = UInt(0, size))
val tags = Reg(Vec(size, UInt(width = paddrBits)))
val data = Reg(Vec(size, UInt(width = ppnBits)))
val hits = tags.map(_ === pte_addr).asUInt & valid
val hit = hits.orR
when (io.mem.resp.valid && pte.table() && !hit) {
val r = Mux(valid.andR, plru.replace, PriorityEncoder(~valid))
valid := valid | UIntToOH(r)
tags(r) := pte_addr
data(r) := pte.ppn
}
when (hit && state === s_req) { plru.access(OHToUInt(hits)) }
when (io.dpath.invalidate) { valid := 0 }
(hit && count < pgLevels-1, Mux1H(hits, data))
}
val pte_wdata = Wire(init=new PTE().fromBits(0))
pte_wdata.a := true
pte_wdata.d := r_req.store
io.mem.req.valid := state.isOneOf(s_req, s_set_dirty)
io.mem.req.bits.phys := Bool(true)
io.mem.req.bits.cmd := Mux(state === s_set_dirty, M_XA_OR, M_XRD)
io.mem.req.bits.typ := log2Ceil(xLen/8)
io.mem.req.bits.addr := pte_addr
io.mem.s1_data := pte_wdata.asUInt
io.mem.s1_kill := s1_kill
io.mem.invalidate_lr := Bool(false)
val resp_ppns = (0 until pgLevels-1).map(i => Cat(pte_addr >> (pgIdxBits + pgLevelBits*(pgLevels-i-1)), r_req.addr(pgLevelBits*(pgLevels-i-1)-1,0))) :+ (pte_addr >> pgIdxBits)
for (i <- 0 until io.requestor.size) {
io.requestor(i).resp.valid := state === s_done && (r_req_dest === i)
io.requestor(i).resp.bits.pte := r_pte
io.requestor(i).resp.bits.pte.ppn := resp_ppns(count)
io.requestor(i).ptbr := io.dpath.ptbr
io.requestor(i).invalidate := io.dpath.invalidate
io.requestor(i).status := io.dpath.status
}
// control state machine
switch (state) {
is (s_ready) {
when (arb.io.out.valid) {
state := s_req
}
count := UInt(0)
}
is (s_req) {
when (pte_cache_hit) {
s1_kill := true
state := s_req
count := count + 1
r_pte.ppn := pte_cache_data
}.elsewhen (io.mem.req.ready) {
state := s_wait1
}
}
is (s_wait1) {
state := s_wait2
when (io.mem.xcpt.pf.ld) {
r_pte.v := false
state := s_done
}
}
is (s_wait2) {
when (io.mem.s2_nack) {
state := s_req
}
when (io.mem.resp.valid) {
state := s_done
when (pte.access_ok(r_req) && (!pte.a || (r_req.store && !pte.d))) {
state := s_set_dirty
}.otherwise {
r_pte := pte
}
when (pte.table() && count < pgLevels-1) {
state := s_req
count := count + 1
}
}
}
is (s_set_dirty) {
when (io.mem.req.ready) {
state := s_wait1_dirty
}
}
is (s_wait1_dirty) {
state := s_wait2_dirty
when (io.mem.xcpt.pf.st) {
r_pte.v := false
state := s_done
}
}
is (s_wait2_dirty) {
when (io.mem.s2_nack) {
state := s_set_dirty
}
when (io.mem.resp.valid) {
state := s_req
}
}
is (s_done) {
state := s_ready
}
}
}

View File

@ -0,0 +1,290 @@
// See LICENSE for license details.
package rocket
import Chisel._
import uncore.tilelink._
import uncore.constants._
import uncore.agents.CacheName
import Util._
import cde.{Parameters, Field}
case object RoccMaxTaggedMemXacts extends Field[Int]
case object RoccNMemChannels extends Field[Int]
case object RoccNPTWPorts extends Field[Int]
class RoCCInstruction extends Bundle
{
val funct = Bits(width = 7)
val rs2 = Bits(width = 5)
val rs1 = Bits(width = 5)
val xd = Bool()
val xs1 = Bool()
val xs2 = Bool()
val rd = Bits(width = 5)
val opcode = Bits(width = 7)
}
class RoCCCommand(implicit p: Parameters) extends CoreBundle()(p) {
val inst = new RoCCInstruction
val rs1 = Bits(width = xLen)
val rs2 = Bits(width = xLen)
val status = new MStatus
}
class RoCCResponse(implicit p: Parameters) extends CoreBundle()(p) {
val rd = Bits(width = 5)
val data = Bits(width = xLen)
}
class RoCCInterface(implicit p: Parameters) extends CoreBundle()(p) {
val cmd = Decoupled(new RoCCCommand).flip
val resp = Decoupled(new RoCCResponse)
val mem = new HellaCacheIO()(p.alterPartial({ case CacheName => "L1D" }))
val busy = Bool(OUTPUT)
val interrupt = Bool(OUTPUT)
// These should be handled differently, eventually
val autl = new ClientUncachedTileLinkIO
val utl = Vec(p(RoccNMemChannels), new ClientUncachedTileLinkIO)
val ptw = Vec(p(RoccNPTWPorts), new TLBPTWIO)
val fpu_req = Decoupled(new FPInput)
val fpu_resp = Decoupled(new FPResult).flip
val exception = Bool(INPUT)
override def cloneType = new RoCCInterface().asInstanceOf[this.type]
}
abstract class RoCC(implicit p: Parameters) extends CoreModule()(p) {
val io = new RoCCInterface
io.mem.req.bits.phys := Bool(true) // don't perform address translation
}
class AccumulatorExample(n: Int = 4)(implicit p: Parameters) extends RoCC()(p) {
val regfile = Mem(n, UInt(width = xLen))
val busy = Reg(init = Vec.fill(n){Bool(false)})
val cmd = Queue(io.cmd)
val funct = cmd.bits.inst.funct
val addr = cmd.bits.rs2(log2Up(n)-1,0)
val doWrite = funct === UInt(0)
val doRead = funct === UInt(1)
val doLoad = funct === UInt(2)
val doAccum = funct === UInt(3)
val memRespTag = io.mem.resp.bits.tag(log2Up(n)-1,0)
// datapath
val addend = cmd.bits.rs1
val accum = regfile(addr)
val wdata = Mux(doWrite, addend, accum + addend)
when (cmd.fire() && (doWrite || doAccum)) {
regfile(addr) := wdata
}
when (io.mem.resp.valid) {
regfile(memRespTag) := io.mem.resp.bits.data
busy(memRespTag) := Bool(false)
}
// control
when (io.mem.req.fire()) {
busy(addr) := Bool(true)
}
val doResp = cmd.bits.inst.xd
val stallReg = busy(addr)
val stallLoad = doLoad && !io.mem.req.ready
val stallResp = doResp && !io.resp.ready
cmd.ready := !stallReg && !stallLoad && !stallResp
// command resolved if no stalls AND not issuing a load that will need a request
// PROC RESPONSE INTERFACE
io.resp.valid := cmd.valid && doResp && !stallReg && !stallLoad
// valid response if valid command, need a response, and no stalls
io.resp.bits.rd := cmd.bits.inst.rd
// Must respond with the appropriate tag or undefined behavior
io.resp.bits.data := accum
// Semantics is to always send out prior accumulator register value
io.busy := cmd.valid || busy.reduce(_||_)
// Be busy when have pending memory requests or committed possibility of pending requests
io.interrupt := Bool(false)
// Set this true to trigger an interrupt on the processor (please refer to supervisor documentation)
// MEMORY REQUEST INTERFACE
io.mem.req.valid := cmd.valid && doLoad && !stallReg && !stallResp
io.mem.req.bits.addr := addend
io.mem.req.bits.tag := addr
io.mem.req.bits.cmd := M_XRD // perform a load (M_XWR for stores)
io.mem.req.bits.typ := MT_D // D = 8 bytes, W = 4, H = 2, B = 1
io.mem.req.bits.data := Bits(0) // we're not performing any stores...
io.mem.invalidate_lr := false
io.autl.acquire.valid := false
io.autl.grant.ready := false
}
class TranslatorExample(implicit p: Parameters) extends RoCC()(p) {
val req_addr = Reg(UInt(width = coreMaxAddrBits))
val req_rd = Reg(io.resp.bits.rd)
val req_offset = req_addr(pgIdxBits - 1, 0)
val req_vpn = req_addr(coreMaxAddrBits - 1, pgIdxBits)
val pte = Reg(new PTE)
val s_idle :: s_ptw_req :: s_ptw_resp :: s_resp :: Nil = Enum(Bits(), 4)
val state = Reg(init = s_idle)
io.cmd.ready := (state === s_idle)
when (io.cmd.fire()) {
req_rd := io.cmd.bits.inst.rd
req_addr := io.cmd.bits.rs1
state := s_ptw_req
}
private val ptw = io.ptw(0)
when (ptw.req.fire()) { state := s_ptw_resp }
when (state === s_ptw_resp && ptw.resp.valid) {
pte := ptw.resp.bits.pte
state := s_resp
}
when (io.resp.fire()) { state := s_idle }
ptw.req.valid := (state === s_ptw_req)
ptw.req.bits.addr := req_vpn
ptw.req.bits.store := Bool(false)
ptw.req.bits.fetch := Bool(false)
io.resp.valid := (state === s_resp)
io.resp.bits.rd := req_rd
io.resp.bits.data := Mux(pte.leaf(), Cat(pte.ppn, req_offset), SInt(-1, xLen).asUInt)
io.busy := (state =/= s_idle)
io.interrupt := Bool(false)
io.mem.req.valid := Bool(false)
io.mem.invalidate_lr := Bool(false)
io.autl.acquire.valid := Bool(false)
io.autl.grant.ready := Bool(false)
}
class CharacterCountExample(implicit p: Parameters) extends RoCC()(p)
with HasTileLinkParameters {
private val blockOffset = tlBeatAddrBits + tlByteAddrBits
val needle = Reg(UInt(width = 8))
val addr = Reg(UInt(width = coreMaxAddrBits))
val count = Reg(UInt(width = xLen))
val resp_rd = Reg(io.resp.bits.rd)
val addr_block = addr(coreMaxAddrBits - 1, blockOffset)
val offset = addr(blockOffset - 1, 0)
val next_addr = (addr_block + UInt(1)) << UInt(blockOffset)
val s_idle :: s_acq :: s_gnt :: s_check :: s_resp :: Nil = Enum(Bits(), 5)
val state = Reg(init = s_idle)
val gnt = io.autl.grant.bits
val recv_data = Reg(UInt(width = tlDataBits))
val recv_beat = Reg(UInt(width = tlBeatAddrBits))
val data_bytes = Vec.tabulate(tlDataBytes) { i => recv_data(8 * (i + 1) - 1, 8 * i) }
val zero_match = data_bytes.map(_ === UInt(0))
val needle_match = data_bytes.map(_ === needle)
val first_zero = PriorityEncoder(zero_match)
val chars_found = PopCount(needle_match.zipWithIndex.map {
case (matches, i) =>
val idx = Cat(recv_beat, UInt(i, tlByteAddrBits))
matches && idx >= offset && UInt(i) <= first_zero
})
val zero_found = zero_match.reduce(_ || _)
val finished = Reg(Bool())
io.cmd.ready := (state === s_idle)
io.resp.valid := (state === s_resp)
io.resp.bits.rd := resp_rd
io.resp.bits.data := count
io.autl.acquire.valid := (state === s_acq)
io.autl.acquire.bits := GetBlock(addr_block = addr_block)
io.autl.grant.ready := (state === s_gnt)
when (io.cmd.fire()) {
addr := io.cmd.bits.rs1
needle := io.cmd.bits.rs2
resp_rd := io.cmd.bits.inst.rd
count := UInt(0)
finished := Bool(false)
state := s_acq
}
when (io.autl.acquire.fire()) { state := s_gnt }
when (io.autl.grant.fire()) {
recv_beat := gnt.addr_beat
recv_data := gnt.data
state := s_check
}
when (state === s_check) {
when (!finished) {
count := count + chars_found
}
when (zero_found) { finished := Bool(true) }
when (recv_beat === UInt(tlDataBeats - 1)) {
addr := next_addr
state := Mux(zero_found || finished, s_resp, s_acq)
} .otherwise {
state := s_gnt
}
}
when (io.resp.fire()) { state := s_idle }
io.busy := (state =/= s_idle)
io.interrupt := Bool(false)
io.mem.req.valid := Bool(false)
io.mem.invalidate_lr := Bool(false)
}
class OpcodeSet(val opcodes: Seq[UInt]) {
def |(set: OpcodeSet) =
new OpcodeSet(this.opcodes ++ set.opcodes)
def matches(oc: UInt) = opcodes.map(_ === oc).reduce(_ || _)
}
object OpcodeSet {
val custom0 = new OpcodeSet(Seq(Bits("b0001011")))
val custom1 = new OpcodeSet(Seq(Bits("b0101011")))
val custom2 = new OpcodeSet(Seq(Bits("b1011011")))
val custom3 = new OpcodeSet(Seq(Bits("b1111011")))
val all = custom0 | custom1 | custom2 | custom3
}
class RoccCommandRouter(opcodes: Seq[OpcodeSet])(implicit p: Parameters)
extends CoreModule()(p) {
val io = new Bundle {
val in = Decoupled(new RoCCCommand).flip
val out = Vec(opcodes.size, Decoupled(new RoCCCommand))
val busy = Bool(OUTPUT)
}
val cmd = Queue(io.in)
val cmdReadys = io.out.zip(opcodes).map { case (out, opcode) =>
val me = opcode.matches(cmd.bits.inst.opcode)
out.valid := cmd.valid && me
out.bits := cmd.bits
out.ready && me
}
cmd.ready := cmdReadys.reduce(_ || _)
io.busy := cmd.valid
assert(PopCount(cmdReadys) <= UInt(1),
"Custom opcode matched for more than one accelerator")
}

View File

@ -0,0 +1,702 @@
// See LICENSE for license details.
package rocket
import Chisel._
import junctions._
import uncore.devices._
import uncore.agents.CacheName
import uncore.constants._
import Util._
import cde.{Parameters, Field}
case object XLen extends Field[Int]
case object FetchWidth extends Field[Int]
case object RetireWidth extends Field[Int]
case object FPUKey extends Field[Option[FPUConfig]]
case object MulDivKey extends Field[Option[MulDivConfig]]
case object UseVM extends Field[Boolean]
case object UseUser extends Field[Boolean]
case object UseDebug extends Field[Boolean]
case object UseAtomics extends Field[Boolean]
case object UseCompressed extends Field[Boolean]
case object FastLoadWord extends Field[Boolean]
case object FastLoadByte extends Field[Boolean]
case object CoreInstBits extends Field[Int]
case object NCustomMRWCSRs extends Field[Int]
case object MtvecWritable extends Field[Boolean]
case object MtvecInit extends Field[BigInt]
case object ResetVector extends Field[BigInt]
case object NBreakpoints extends Field[Int]
trait HasCoreParameters extends HasAddrMapParameters {
implicit val p: Parameters
val xLen = p(XLen)
val usingVM = p(UseVM)
val usingUser = p(UseUser) || usingVM
val usingDebug = p(UseDebug)
val usingMulDiv = p(MulDivKey).nonEmpty
val usingFPU = p(FPUKey).nonEmpty
val usingAtomics = p(UseAtomics)
val usingCompressed = p(UseCompressed)
val usingRoCC = !p(BuildRoCC).isEmpty
val fastLoadWord = p(FastLoadWord)
val fastLoadByte = p(FastLoadByte)
val retireWidth = p(RetireWidth)
val fetchWidth = p(FetchWidth)
val coreInstBits = p(CoreInstBits)
val coreInstBytes = coreInstBits/8
val coreDataBits = xLen
val coreDataBytes = coreDataBits/8
val dcacheArbPorts = 1 + (if (usingVM) 1 else 0) + p(BuildRoCC).size
val coreDCacheReqTagBits = 6
val dcacheReqTagBits = coreDCacheReqTagBits + log2Ceil(dcacheArbPorts)
def pgIdxBits = 12
def pgLevelBits = 10 - log2Ceil(xLen / 32)
def vaddrBits = pgIdxBits + pgLevels * pgLevelBits
def ppnBits = paddrBits - pgIdxBits
def vpnBits = vaddrBits - pgIdxBits
val pgLevels = p(PgLevels)
val asIdBits = p(ASIdBits)
val vpnBitsExtended = vpnBits + (vaddrBits < xLen).toInt
val vaddrBitsExtended = vpnBitsExtended + pgIdxBits
val coreMaxAddrBits = paddrBits max vaddrBitsExtended
val nCustomMrwCsrs = p(NCustomMRWCSRs)
val nCores = p(NTiles)
// fetchWidth doubled, but coreInstBytes halved, for RVC
val decodeWidth = fetchWidth / (if (usingCompressed) 2 else 1)
// Print out log of committed instructions and their writeback values.
// Requires post-processing due to out-of-order writebacks.
val enableCommitLog = false
val maxPAddrBits = xLen match {
case 32 => 34
case 64 => 50
}
require(paddrBits <= maxPAddrBits)
require(!fastLoadByte || fastLoadWord)
}
abstract class CoreModule(implicit val p: Parameters) extends Module
with HasCoreParameters
abstract class CoreBundle(implicit val p: Parameters) extends ParameterizedBundle()(p)
with HasCoreParameters
class RegFile(n: Int, w: Int, zero: Boolean = false) {
private val rf = Mem(n, UInt(width = w))
private def access(addr: UInt) = rf(~addr(log2Up(n)-1,0))
private val reads = collection.mutable.ArrayBuffer[(UInt,UInt)]()
private var canRead = true
def read(addr: UInt) = {
require(canRead)
reads += addr -> Wire(UInt())
reads.last._2 := Mux(Bool(zero) && addr === UInt(0), UInt(0), access(addr))
reads.last._2
}
def write(addr: UInt, data: UInt) = {
canRead = false
when (addr =/= UInt(0)) {
access(addr) := data
for ((raddr, rdata) <- reads)
when (addr === raddr) { rdata := data }
}
}
}
object ImmGen {
def apply(sel: UInt, inst: UInt) = {
val sign = Mux(sel === IMM_Z, SInt(0), inst(31).asSInt)
val b30_20 = Mux(sel === IMM_U, inst(30,20).asSInt, sign)
val b19_12 = Mux(sel =/= IMM_U && sel =/= IMM_UJ, sign, inst(19,12).asSInt)
val b11 = Mux(sel === IMM_U || sel === IMM_Z, SInt(0),
Mux(sel === IMM_UJ, inst(20).asSInt,
Mux(sel === IMM_SB, inst(7).asSInt, sign)))
val b10_5 = Mux(sel === IMM_U || sel === IMM_Z, Bits(0), inst(30,25))
val b4_1 = Mux(sel === IMM_U, Bits(0),
Mux(sel === IMM_S || sel === IMM_SB, inst(11,8),
Mux(sel === IMM_Z, inst(19,16), inst(24,21))))
val b0 = Mux(sel === IMM_S, inst(7),
Mux(sel === IMM_I, inst(20),
Mux(sel === IMM_Z, inst(15), Bits(0))))
Cat(sign, b30_20, b19_12, b11, b10_5, b4_1, b0).asSInt
}
}
class Rocket(implicit p: Parameters) extends CoreModule()(p) {
val io = new Bundle {
val prci = new PRCITileIO().flip
val imem = new FrontendIO()(p.alterPartial({case CacheName => "L1I" }))
val dmem = new HellaCacheIO()(p.alterPartial({ case CacheName => "L1D" }))
val ptw = new DatapathPTWIO().flip
val fpu = new FPUIO().flip
val rocc = new RoCCInterface().flip
}
val decode_table = {
(if (usingMulDiv) new MDecode +: (if (xLen > 32) Seq(new M64Decode) else Nil) else Nil) ++:
(if (usingAtomics) new ADecode +: (if (xLen > 32) Seq(new A64Decode) else Nil) else Nil) ++:
(if (usingFPU) new FDecode +: (if (xLen > 32) Seq(new F64Decode) else Nil) else Nil) ++:
(if (usingRoCC) Some(new RoCCDecode) else None) ++:
(if (xLen > 32) Some(new I64Decode) else None) ++:
(if (usingVM) Some(new SDecode) else None) ++:
(if (usingDebug) Some(new DebugDecode) else None) ++:
Seq(new IDecode)
} flatMap(_.table)
val ex_ctrl = Reg(new IntCtrlSigs)
val mem_ctrl = Reg(new IntCtrlSigs)
val wb_ctrl = Reg(new IntCtrlSigs)
val ex_reg_xcpt_interrupt = Reg(Bool())
val ex_reg_valid = Reg(Bool())
val ex_reg_rvc = Reg(Bool())
val ex_reg_btb_hit = Reg(Bool())
val ex_reg_btb_resp = Reg(new BTBResp)
val ex_reg_xcpt = Reg(Bool())
val ex_reg_flush_pipe = Reg(Bool())
val ex_reg_load_use = Reg(Bool())
val ex_reg_cause = Reg(UInt())
val ex_reg_replay = Reg(Bool())
val ex_reg_pc = Reg(UInt())
val ex_reg_inst = Reg(Bits())
val mem_reg_xcpt_interrupt = Reg(Bool())
val mem_reg_valid = Reg(Bool())
val mem_reg_rvc = Reg(Bool())
val mem_reg_btb_hit = Reg(Bool())
val mem_reg_btb_resp = Reg(new BTBResp)
val mem_reg_xcpt = Reg(Bool())
val mem_reg_replay = Reg(Bool())
val mem_reg_flush_pipe = Reg(Bool())
val mem_reg_cause = Reg(UInt())
val mem_reg_slow_bypass = Reg(Bool())
val mem_reg_load = Reg(Bool())
val mem_reg_store = Reg(Bool())
val mem_reg_pc = Reg(UInt())
val mem_reg_inst = Reg(Bits())
val mem_reg_wdata = Reg(Bits())
val mem_reg_rs2 = Reg(Bits())
val take_pc_mem = Wire(Bool())
val wb_reg_valid = Reg(Bool())
val wb_reg_xcpt = Reg(Bool())
val wb_reg_replay = Reg(Bool())
val wb_reg_cause = Reg(UInt())
val wb_reg_pc = Reg(UInt())
val wb_reg_inst = Reg(Bits())
val wb_reg_wdata = Reg(Bits())
val wb_reg_rs2 = Reg(Bits())
val take_pc_wb = Wire(Bool())
val take_pc_mem_wb = take_pc_wb || take_pc_mem
val take_pc = take_pc_mem_wb
// decode stage
val ibuf = Module(new IBuf)
val id_expanded_inst = ibuf.io.inst.map(_.bits.inst)
val id_inst = id_expanded_inst.map(_.bits)
ibuf.io.imem <> io.imem.resp
ibuf.io.kill := take_pc
require(decodeWidth == 1 /* TODO */ && retireWidth == decodeWidth)
val id_ctrl = Wire(new IntCtrlSigs()).decode(id_inst(0), decode_table)
val id_raddr3 = id_expanded_inst(0).rs3
val id_raddr2 = id_expanded_inst(0).rs2
val id_raddr1 = id_expanded_inst(0).rs1
val id_waddr = id_expanded_inst(0).rd
val id_load_use = Wire(Bool())
val id_reg_fence = Reg(init=Bool(false))
val id_ren = IndexedSeq(id_ctrl.rxs1, id_ctrl.rxs2)
val id_raddr = IndexedSeq(id_raddr1, id_raddr2)
val rf = new RegFile(31, xLen)
val id_rs = id_raddr.map(rf.read _)
val ctrl_killd = Wire(Bool())
val csr = Module(new CSRFile)
val id_csr_en = id_ctrl.csr =/= CSR.N
val id_system_insn = id_ctrl.csr === CSR.I
val id_csr_ren = (id_ctrl.csr === CSR.S || id_ctrl.csr === CSR.C) && id_raddr1 === UInt(0)
val id_csr = Mux(id_csr_ren, CSR.R, id_ctrl.csr)
val id_csr_addr = id_inst(0)(31,20)
// this is overly conservative
val safe_csrs = CSRs.sscratch :: CSRs.sepc :: CSRs.mscratch :: CSRs.mepc :: CSRs.mcause :: CSRs.mbadaddr :: Nil
val legal_csrs = collection.mutable.LinkedHashSet(CSRs.all:_*)
val id_csr_flush = id_system_insn || (id_csr_en && !id_csr_ren && !DecodeLogic(id_csr_addr, safe_csrs.map(UInt(_)), (legal_csrs -- safe_csrs).toList.map(UInt(_))))
val id_illegal_insn = !id_ctrl.legal ||
id_ctrl.fp && !csr.io.status.fs.orR ||
id_ctrl.rocc && !csr.io.status.xs.orR
// stall decode for fences (now, for AMO.aq; later, for AMO.rl and FENCE)
val id_amo_aq = id_inst(0)(26)
val id_amo_rl = id_inst(0)(25)
val id_fence_next = id_ctrl.fence || id_ctrl.amo && id_amo_rl
val id_mem_busy = !io.dmem.ordered || io.dmem.req.valid
val id_rocc_busy = Bool(usingRoCC) &&
(io.rocc.busy || ex_reg_valid && ex_ctrl.rocc ||
mem_reg_valid && mem_ctrl.rocc || wb_reg_valid && wb_ctrl.rocc)
id_reg_fence := id_fence_next || id_reg_fence && id_mem_busy
val id_do_fence = id_rocc_busy && id_ctrl.fence ||
id_mem_busy && (id_ctrl.amo && id_amo_aq || id_ctrl.fence_i || id_reg_fence && (id_ctrl.mem || id_ctrl.rocc) || id_csr_en)
val bpu = Module(new BreakpointUnit)
bpu.io.status := csr.io.status
bpu.io.bp := csr.io.bp
bpu.io.pc := ibuf.io.pc
bpu.io.ea := mem_reg_wdata
val id_xcpt_if = ibuf.io.inst(0).bits.pf0 || ibuf.io.inst(0).bits.pf1
val (id_xcpt, id_cause) = checkExceptions(List(
(csr.io.interrupt, csr.io.interrupt_cause),
(bpu.io.xcpt_if, UInt(Causes.breakpoint)),
(id_xcpt_if, UInt(Causes.fault_fetch)),
(id_illegal_insn, UInt(Causes.illegal_instruction))))
val dcache_bypass_data =
if (fastLoadByte) io.dmem.resp.bits.data
else if (fastLoadWord) io.dmem.resp.bits.data_word_bypass
else wb_reg_wdata
// detect bypass opportunities
val ex_waddr = ex_reg_inst(11,7)
val mem_waddr = mem_reg_inst(11,7)
val wb_waddr = wb_reg_inst(11,7)
val bypass_sources = IndexedSeq(
(Bool(true), UInt(0), UInt(0)), // treat reading x0 as a bypass
(ex_reg_valid && ex_ctrl.wxd, ex_waddr, mem_reg_wdata),
(mem_reg_valid && mem_ctrl.wxd && !mem_ctrl.mem, mem_waddr, wb_reg_wdata),
(mem_reg_valid && mem_ctrl.wxd, mem_waddr, dcache_bypass_data))
val id_bypass_src = id_raddr.map(raddr => bypass_sources.map(s => s._1 && s._2 === raddr))
// execute stage
val bypass_mux = Vec(bypass_sources.map(_._3))
val ex_reg_rs_bypass = Reg(Vec(id_raddr.size, Bool()))
val ex_reg_rs_lsb = Reg(Vec(id_raddr.size, UInt()))
val ex_reg_rs_msb = Reg(Vec(id_raddr.size, UInt()))
val ex_rs = for (i <- 0 until id_raddr.size)
yield Mux(ex_reg_rs_bypass(i), bypass_mux(ex_reg_rs_lsb(i)), Cat(ex_reg_rs_msb(i), ex_reg_rs_lsb(i)))
val ex_imm = ImmGen(ex_ctrl.sel_imm, ex_reg_inst)
val ex_op1 = MuxLookup(ex_ctrl.sel_alu1, SInt(0), Seq(
A1_RS1 -> ex_rs(0).asSInt,
A1_PC -> ex_reg_pc.asSInt))
val ex_op2 = MuxLookup(ex_ctrl.sel_alu2, SInt(0), Seq(
A2_RS2 -> ex_rs(1).asSInt,
A2_IMM -> ex_imm,
A2_SIZE -> Mux(ex_reg_rvc, SInt(2), SInt(4))))
val alu = Module(new ALU)
alu.io.dw := ex_ctrl.alu_dw
alu.io.fn := ex_ctrl.alu_fn
alu.io.in2 := ex_op2.asUInt
alu.io.in1 := ex_op1.asUInt
// multiplier and divider
val div = Module(new MulDiv(p(MulDivKey).getOrElse(MulDivConfig()), width = xLen))
div.io.req.valid := ex_reg_valid && ex_ctrl.div
div.io.req.bits.dw := ex_ctrl.alu_dw
div.io.req.bits.fn := ex_ctrl.alu_fn
div.io.req.bits.in1 := ex_rs(0)
div.io.req.bits.in2 := ex_rs(1)
div.io.req.bits.tag := ex_waddr
ex_reg_valid := !ctrl_killd
ex_reg_replay := !take_pc && ibuf.io.inst(0).valid && ibuf.io.inst(0).bits.replay
ex_reg_xcpt := !ctrl_killd && id_xcpt
ex_reg_xcpt_interrupt := !take_pc && ibuf.io.inst(0).valid && csr.io.interrupt
when (id_xcpt) { ex_reg_cause := id_cause }
ex_reg_btb_hit := ibuf.io.inst(0).bits.btb_hit
when (ibuf.io.inst(0).bits.btb_hit) { ex_reg_btb_resp := ibuf.io.btb_resp }
when (!ctrl_killd) {
ex_ctrl := id_ctrl
ex_reg_rvc := ibuf.io.inst(0).bits.rvc
ex_ctrl.csr := id_csr
when (id_xcpt) { // pass PC down ALU writeback pipeline for badaddr
ex_ctrl.alu_fn := ALU.FN_ADD
ex_ctrl.alu_dw := DW_XPR
ex_ctrl.sel_alu1 := A1_PC
ex_ctrl.sel_alu2 := A2_ZERO
when (!bpu.io.xcpt_if && !ibuf.io.inst(0).bits.pf0 && ibuf.io.inst(0).bits.pf1) { // PC+2
ex_ctrl.sel_alu2 := A2_SIZE
ex_reg_rvc := true
}
}
ex_reg_flush_pipe := id_ctrl.fence_i || id_csr_flush || csr.io.singleStep
ex_reg_load_use := id_load_use
when (id_ctrl.jalr && csr.io.status.debug) {
ex_reg_flush_pipe := true
ex_ctrl.fence_i := true
}
for (i <- 0 until id_raddr.size) {
val do_bypass = id_bypass_src(i).reduce(_||_)
val bypass_src = PriorityEncoder(id_bypass_src(i))
ex_reg_rs_bypass(i) := do_bypass
ex_reg_rs_lsb(i) := bypass_src
when (id_ren(i) && !do_bypass) {
ex_reg_rs_lsb(i) := id_rs(i)(bypass_src.getWidth-1,0)
ex_reg_rs_msb(i) := id_rs(i) >> bypass_src.getWidth
}
}
}
when (!ctrl_killd || csr.io.interrupt || ibuf.io.inst(0).bits.replay) {
ex_reg_inst := id_inst(0)
ex_reg_pc := ibuf.io.pc
}
// replay inst in ex stage?
val ex_pc_valid = ex_reg_valid || ex_reg_replay || ex_reg_xcpt_interrupt
val wb_dcache_miss = wb_ctrl.mem && !io.dmem.resp.valid
val replay_ex_structural = ex_ctrl.mem && !io.dmem.req.ready ||
ex_ctrl.div && !div.io.req.ready
val replay_ex_load_use = wb_dcache_miss && ex_reg_load_use
val replay_ex = ex_reg_replay || (ex_reg_valid && (replay_ex_structural || replay_ex_load_use))
val ctrl_killx = take_pc_mem_wb || replay_ex || !ex_reg_valid
// detect 2-cycle load-use delay for LB/LH/SC
val ex_slow_bypass = ex_ctrl.mem_cmd === M_XSC || Vec(MT_B, MT_BU, MT_H, MT_HU).contains(ex_ctrl.mem_type)
val (ex_xcpt, ex_cause) = checkExceptions(List(
(ex_reg_xcpt_interrupt || ex_reg_xcpt, ex_reg_cause),
(ex_ctrl.fp && io.fpu.illegal_rm, UInt(Causes.illegal_instruction))))
// memory stage
val mem_br_taken = mem_reg_wdata(0)
val mem_br_target = mem_reg_pc.asSInt +
Mux(mem_ctrl.branch && mem_br_taken, ImmGen(IMM_SB, mem_reg_inst),
Mux(mem_ctrl.jal, ImmGen(IMM_UJ, mem_reg_inst),
Mux(mem_reg_rvc, SInt(2), SInt(4))))
val mem_npc = (Mux(mem_ctrl.jalr, encodeVirtualAddress(mem_reg_wdata, mem_reg_wdata).asSInt, mem_br_target) & SInt(-2)).asUInt
val mem_wrong_npc = Mux(ex_pc_valid, mem_npc =/= ex_reg_pc, Mux(ibuf.io.inst(0).valid, mem_npc =/= ibuf.io.pc, Bool(true)))
val mem_npc_misaligned = if (usingCompressed) Bool(false) else mem_npc(1)
val mem_int_wdata = Mux(!mem_reg_xcpt && (mem_ctrl.jalr ^ mem_npc_misaligned), mem_br_target, mem_reg_wdata.asSInt).asUInt
val mem_cfi = mem_ctrl.branch || mem_ctrl.jalr || mem_ctrl.jal
val mem_cfi_taken = (mem_ctrl.branch && mem_br_taken) || mem_ctrl.jalr || mem_ctrl.jal
val mem_misprediction =
if (p(BtbKey).nEntries == 0) mem_cfi_taken
else mem_wrong_npc
take_pc_mem := mem_reg_valid && (mem_misprediction || mem_reg_flush_pipe)
mem_reg_valid := !ctrl_killx
mem_reg_replay := !take_pc_mem_wb && replay_ex
mem_reg_xcpt := !ctrl_killx && ex_xcpt
mem_reg_xcpt_interrupt := !take_pc_mem_wb && ex_reg_xcpt_interrupt
when (ex_xcpt) { mem_reg_cause := ex_cause }
when (ex_pc_valid) {
mem_ctrl := ex_ctrl
mem_reg_rvc := ex_reg_rvc
mem_reg_load := ex_ctrl.mem && isRead(ex_ctrl.mem_cmd)
mem_reg_store := ex_ctrl.mem && isWrite(ex_ctrl.mem_cmd)
mem_reg_btb_hit := ex_reg_btb_hit
when (ex_reg_btb_hit) { mem_reg_btb_resp := ex_reg_btb_resp }
mem_reg_flush_pipe := ex_reg_flush_pipe
mem_reg_slow_bypass := ex_slow_bypass
mem_reg_inst := ex_reg_inst
mem_reg_pc := ex_reg_pc
mem_reg_wdata := alu.io.out
when (ex_ctrl.rxs2 && (ex_ctrl.mem || ex_ctrl.rocc)) {
mem_reg_rs2 := ex_rs(1)
}
}
val mem_breakpoint = (mem_reg_load && bpu.io.xcpt_ld) || (mem_reg_store && bpu.io.xcpt_st)
val (mem_new_xcpt, mem_new_cause) = checkExceptions(List(
(mem_breakpoint, UInt(Causes.breakpoint)),
(mem_npc_misaligned, UInt(Causes.misaligned_fetch)),
(mem_ctrl.mem && io.dmem.xcpt.ma.st, UInt(Causes.misaligned_store)),
(mem_ctrl.mem && io.dmem.xcpt.ma.ld, UInt(Causes.misaligned_load)),
(mem_ctrl.mem && io.dmem.xcpt.pf.st, UInt(Causes.fault_store)),
(mem_ctrl.mem && io.dmem.xcpt.pf.ld, UInt(Causes.fault_load))))
val (mem_xcpt, mem_cause) = checkExceptions(List(
(mem_reg_xcpt_interrupt || mem_reg_xcpt, mem_reg_cause),
(mem_reg_valid && mem_new_xcpt, mem_new_cause)))
val dcache_kill_mem = mem_reg_valid && mem_ctrl.wxd && io.dmem.replay_next // structural hazard on writeback port
val fpu_kill_mem = mem_reg_valid && mem_ctrl.fp && io.fpu.nack_mem
val replay_mem = dcache_kill_mem || mem_reg_replay || fpu_kill_mem
val killm_common = dcache_kill_mem || take_pc_wb || mem_reg_xcpt || !mem_reg_valid
div.io.kill := killm_common && Reg(next = div.io.req.fire())
val ctrl_killm = killm_common || mem_xcpt || fpu_kill_mem
// writeback stage
wb_reg_valid := !ctrl_killm
wb_reg_replay := replay_mem && !take_pc_wb
wb_reg_xcpt := mem_xcpt && !take_pc_wb
when (mem_xcpt) { wb_reg_cause := mem_cause }
when (mem_reg_valid || mem_reg_replay || mem_reg_xcpt_interrupt) {
wb_ctrl := mem_ctrl
wb_reg_wdata := Mux(!mem_reg_xcpt && mem_ctrl.fp && mem_ctrl.wxd, io.fpu.toint_data, mem_int_wdata)
when (mem_ctrl.rocc) {
wb_reg_rs2 := mem_reg_rs2
}
wb_reg_inst := mem_reg_inst
wb_reg_pc := mem_reg_pc
}
val wb_set_sboard = wb_ctrl.div || wb_dcache_miss || wb_ctrl.rocc
val replay_wb_common = io.dmem.s2_nack || wb_reg_replay
val replay_wb_rocc = wb_reg_valid && wb_ctrl.rocc && !io.rocc.cmd.ready
val replay_wb = replay_wb_common || replay_wb_rocc
val wb_xcpt = wb_reg_xcpt || csr.io.csr_xcpt
take_pc_wb := replay_wb || wb_xcpt || csr.io.eret
// writeback arbitration
val dmem_resp_xpu = !io.dmem.resp.bits.tag(0).toBool
val dmem_resp_fpu = io.dmem.resp.bits.tag(0).toBool
val dmem_resp_waddr = io.dmem.resp.bits.tag(5, 1)
val dmem_resp_valid = io.dmem.resp.valid && io.dmem.resp.bits.has_data
val dmem_resp_replay = dmem_resp_valid && io.dmem.resp.bits.replay
div.io.resp.ready := !(wb_reg_valid && wb_ctrl.wxd)
val ll_wdata = Wire(init = div.io.resp.bits.data)
val ll_waddr = Wire(init = div.io.resp.bits.tag)
val ll_wen = Wire(init = div.io.resp.fire())
if (usingRoCC) {
io.rocc.resp.ready := !(wb_reg_valid && wb_ctrl.wxd)
when (io.rocc.resp.fire()) {
div.io.resp.ready := Bool(false)
ll_wdata := io.rocc.resp.bits.data
ll_waddr := io.rocc.resp.bits.rd
ll_wen := Bool(true)
}
}
when (dmem_resp_replay && dmem_resp_xpu) {
div.io.resp.ready := Bool(false)
if (usingRoCC)
io.rocc.resp.ready := Bool(false)
ll_waddr := dmem_resp_waddr
ll_wen := Bool(true)
}
val wb_valid = wb_reg_valid && !replay_wb && !wb_xcpt
val wb_wen = wb_valid && wb_ctrl.wxd
val rf_wen = wb_wen || ll_wen
val rf_waddr = Mux(ll_wen, ll_waddr, wb_waddr)
val rf_wdata = Mux(dmem_resp_valid && dmem_resp_xpu, io.dmem.resp.bits.data,
Mux(ll_wen, ll_wdata,
Mux(wb_ctrl.csr =/= CSR.N, csr.io.rw.rdata,
wb_reg_wdata)))
when (rf_wen) { rf.write(rf_waddr, rf_wdata) }
// hook up control/status regfile
csr.io.exception := wb_reg_xcpt
csr.io.cause := wb_reg_cause
csr.io.retire := wb_valid
csr.io.prci <> io.prci
io.fpu.fcsr_rm := csr.io.fcsr_rm
csr.io.fcsr_flags := io.fpu.fcsr_flags
csr.io.rocc.interrupt <> io.rocc.interrupt
csr.io.pc := wb_reg_pc
csr.io.badaddr := encodeVirtualAddress(wb_reg_wdata, wb_reg_wdata)
io.ptw.ptbr := csr.io.ptbr
io.ptw.invalidate := csr.io.fatc
io.ptw.status := csr.io.status
csr.io.rw.addr := wb_reg_inst(31,20)
csr.io.rw.cmd := Mux(wb_reg_valid, wb_ctrl.csr, CSR.N)
csr.io.rw.wdata := wb_reg_wdata
val hazard_targets = Seq((id_ctrl.rxs1 && id_raddr1 =/= UInt(0), id_raddr1),
(id_ctrl.rxs2 && id_raddr2 =/= UInt(0), id_raddr2),
(id_ctrl.wxd && id_waddr =/= UInt(0), id_waddr))
val fp_hazard_targets = Seq((io.fpu.dec.ren1, id_raddr1),
(io.fpu.dec.ren2, id_raddr2),
(io.fpu.dec.ren3, id_raddr3),
(io.fpu.dec.wen, id_waddr))
val sboard = new Scoreboard(32, true)
sboard.clear(ll_wen, ll_waddr)
val id_sboard_hazard = checkHazards(hazard_targets, sboard.read _)
sboard.set(wb_set_sboard && wb_wen, wb_waddr)
// stall for RAW/WAW hazards on CSRs, loads, AMOs, and mul/div in execute stage.
val ex_cannot_bypass = ex_ctrl.csr =/= CSR.N || ex_ctrl.jalr || ex_ctrl.mem || ex_ctrl.div || ex_ctrl.fp || ex_ctrl.rocc
val data_hazard_ex = ex_ctrl.wxd && checkHazards(hazard_targets, _ === ex_waddr)
val fp_data_hazard_ex = ex_ctrl.wfd && checkHazards(fp_hazard_targets, _ === ex_waddr)
val id_ex_hazard = ex_reg_valid && (data_hazard_ex && ex_cannot_bypass || fp_data_hazard_ex)
// stall for RAW/WAW hazards on CSRs, LB/LH, and mul/div in memory stage.
val mem_mem_cmd_bh =
if (fastLoadWord) Bool(!fastLoadByte) && mem_reg_slow_bypass
else Bool(true)
val mem_cannot_bypass = mem_ctrl.csr =/= CSR.N || mem_ctrl.mem && mem_mem_cmd_bh || mem_ctrl.div || mem_ctrl.fp || mem_ctrl.rocc
val data_hazard_mem = mem_ctrl.wxd && checkHazards(hazard_targets, _ === mem_waddr)
val fp_data_hazard_mem = mem_ctrl.wfd && checkHazards(fp_hazard_targets, _ === mem_waddr)
val id_mem_hazard = mem_reg_valid && (data_hazard_mem && mem_cannot_bypass || fp_data_hazard_mem)
id_load_use := mem_reg_valid && data_hazard_mem && mem_ctrl.mem
// stall for RAW/WAW hazards on load/AMO misses and mul/div in writeback.
val data_hazard_wb = wb_ctrl.wxd && checkHazards(hazard_targets, _ === wb_waddr)
val fp_data_hazard_wb = wb_ctrl.wfd && checkHazards(fp_hazard_targets, _ === wb_waddr)
val id_wb_hazard = wb_reg_valid && (data_hazard_wb && wb_set_sboard || fp_data_hazard_wb)
val id_stall_fpu = if (usingFPU) {
val fp_sboard = new Scoreboard(32)
fp_sboard.set((wb_dcache_miss && wb_ctrl.wfd || io.fpu.sboard_set) && wb_valid, wb_waddr)
fp_sboard.clear(dmem_resp_replay && dmem_resp_fpu, dmem_resp_waddr)
fp_sboard.clear(io.fpu.sboard_clr, io.fpu.sboard_clra)
id_csr_en && !io.fpu.fcsr_rdy || checkHazards(fp_hazard_targets, fp_sboard.read _)
} else Bool(false)
val dcache_blocked = Reg(Bool())
dcache_blocked := !io.dmem.req.ready && (io.dmem.req.valid || dcache_blocked)
val rocc_blocked = Reg(Bool())
rocc_blocked := !wb_reg_xcpt && !io.rocc.cmd.ready && (io.rocc.cmd.valid || rocc_blocked)
val ctrl_stalld =
id_ex_hazard || id_mem_hazard || id_wb_hazard || id_sboard_hazard ||
id_ctrl.fp && id_stall_fpu ||
id_ctrl.mem && dcache_blocked || // reduce activity during D$ misses
id_ctrl.rocc && rocc_blocked || // reduce activity while RoCC is busy
id_do_fence ||
csr.io.csr_stall
ctrl_killd := !ibuf.io.inst(0).valid || ibuf.io.inst(0).bits.replay || take_pc || ctrl_stalld || csr.io.interrupt
io.imem.req.valid := take_pc
io.imem.req.bits.speculative := !take_pc_wb
io.imem.req.bits.pc :=
Mux(wb_xcpt || csr.io.eret, csr.io.evec, // exception or [m|s]ret
Mux(replay_wb, wb_reg_pc, // replay
mem_npc)) // mispredicted branch
io.imem.flush_icache := wb_reg_valid && wb_ctrl.fence_i && !io.dmem.s2_nack
io.imem.flush_tlb := csr.io.fatc
ibuf.io.inst(0).ready := !ctrl_stalld || csr.io.interrupt
io.imem.btb_update.valid := (mem_reg_replay && mem_reg_btb_hit) || (mem_reg_valid && !take_pc_wb && (mem_cfi_taken || !mem_cfi) && mem_wrong_npc)
io.imem.btb_update.bits.isValid := !mem_reg_replay && mem_cfi
io.imem.btb_update.bits.isJump := mem_ctrl.jal || mem_ctrl.jalr
io.imem.btb_update.bits.isReturn := mem_ctrl.jalr && mem_reg_inst(19,15) === BitPat("b00??1")
io.imem.btb_update.bits.target := io.imem.req.bits.pc
io.imem.btb_update.bits.br_pc := (if (usingCompressed) mem_reg_pc + Mux(mem_reg_rvc, UInt(0), UInt(2)) else mem_reg_pc)
io.imem.btb_update.bits.pc := ~(~io.imem.btb_update.bits.br_pc | (coreInstBytes*fetchWidth-1))
io.imem.btb_update.bits.prediction.valid := mem_reg_btb_hit
io.imem.btb_update.bits.prediction.bits := mem_reg_btb_resp
io.imem.bht_update.valid := mem_reg_valid && !take_pc_wb && mem_ctrl.branch
io.imem.bht_update.bits.pc := io.imem.btb_update.bits.pc
io.imem.bht_update.bits.taken := mem_br_taken
io.imem.bht_update.bits.mispredict := mem_wrong_npc
io.imem.bht_update.bits.prediction := io.imem.btb_update.bits.prediction
io.imem.ras_update.valid := mem_reg_valid && !take_pc_wb
io.imem.ras_update.bits.returnAddr := mem_int_wdata
io.imem.ras_update.bits.isCall := io.imem.btb_update.bits.isJump && mem_waddr(0)
io.imem.ras_update.bits.isReturn := io.imem.btb_update.bits.isReturn
io.imem.ras_update.bits.prediction := io.imem.btb_update.bits.prediction
io.fpu.valid := !ctrl_killd && id_ctrl.fp
io.fpu.killx := ctrl_killx
io.fpu.killm := killm_common
io.fpu.inst := id_inst(0)
io.fpu.fromint_data := ex_rs(0)
io.fpu.dmem_resp_val := dmem_resp_valid && dmem_resp_fpu
io.fpu.dmem_resp_data := io.dmem.resp.bits.data_word_bypass
io.fpu.dmem_resp_type := io.dmem.resp.bits.typ
io.fpu.dmem_resp_tag := dmem_resp_waddr
io.dmem.req.valid := ex_reg_valid && ex_ctrl.mem
val ex_dcache_tag = Cat(ex_waddr, ex_ctrl.fp)
require(coreDCacheReqTagBits >= ex_dcache_tag.getWidth)
io.dmem.req.bits.tag := ex_dcache_tag
io.dmem.req.bits.cmd := ex_ctrl.mem_cmd
io.dmem.req.bits.typ := ex_ctrl.mem_type
io.dmem.req.bits.phys := Bool(false)
io.dmem.req.bits.addr := encodeVirtualAddress(ex_rs(0), alu.io.adder_out)
io.dmem.invalidate_lr := wb_xcpt
io.dmem.s1_data := Mux(mem_ctrl.fp, io.fpu.store_data, mem_reg_rs2)
io.dmem.s1_kill := killm_common || mem_breakpoint
when (mem_xcpt && !io.dmem.s1_kill) {
assert(io.dmem.xcpt.asUInt.orR) // make sure s1_kill is exhaustive
}
io.rocc.cmd.valid := wb_reg_valid && wb_ctrl.rocc && !replay_wb_common
io.rocc.exception := wb_xcpt && csr.io.status.xs.orR
io.rocc.cmd.bits.status := csr.io.status
io.rocc.cmd.bits.inst := new RoCCInstruction().fromBits(wb_reg_inst)
io.rocc.cmd.bits.rs1 := wb_reg_wdata
io.rocc.cmd.bits.rs2 := wb_reg_rs2
if (enableCommitLog) {
val pc = Wire(SInt(width=xLen))
pc := wb_reg_pc
val inst = wb_reg_inst
val rd = RegNext(RegNext(RegNext(id_waddr)))
val wfd = wb_ctrl.wfd
val wxd = wb_ctrl.wxd
val has_data = wb_wen && !wb_set_sboard
val priv = csr.io.status.prv
when (wb_valid) {
when (wfd) {
printf ("%d 0x%x (0x%x) f%d p%d 0xXXXXXXXXXXXXXXXX\n", priv, pc, inst, rd, rd+UInt(32))
}
.elsewhen (wxd && rd =/= UInt(0) && has_data) {
printf ("%d 0x%x (0x%x) x%d 0x%x\n", priv, pc, inst, rd, rf_wdata)
}
.elsewhen (wxd && rd =/= UInt(0) && !has_data) {
printf ("%d 0x%x (0x%x) x%d p%d 0xXXXXXXXXXXXXXXXX\n", priv, pc, inst, rd, rd)
}
.otherwise {
printf ("%d 0x%x (0x%x)\n", priv, pc, inst)
}
}
when (ll_wen && rf_waddr =/= UInt(0)) {
printf ("x%d p%d 0x%x\n", rf_waddr, rf_waddr, rf_wdata)
}
}
else {
printf("C%d: %d [%d] pc=[%x] W[r%d=%x][%d] R[r%d=%x] R[r%d=%x] inst=[%x] DASM(%x)\n",
io.prci.id, csr.io.time(31,0), wb_valid, wb_reg_pc,
Mux(rf_wen, rf_waddr, UInt(0)), rf_wdata, rf_wen,
wb_reg_inst(19,15), Reg(next=Reg(next=ex_rs(0))),
wb_reg_inst(24,20), Reg(next=Reg(next=ex_rs(1))),
wb_reg_inst, wb_reg_inst)
}
def checkExceptions(x: Seq[(Bool, UInt)]) =
(x.map(_._1).reduce(_||_), PriorityMux(x))
def checkHazards(targets: Seq[(Bool, UInt)], cond: UInt => Bool) =
targets.map(h => h._1 && cond(h._2)).reduce(_||_)
def encodeVirtualAddress(a0: UInt, ea: UInt) = if (vaddrBitsExtended == vaddrBits) ea else {
// efficient means to compress 64-bit VA into vaddrBits+1 bits
// (VA is bad if VA(vaddrBits) != VA(vaddrBits-1))
val a = a0 >> vaddrBits-1
val e = ea(vaddrBits,vaddrBits-1).asSInt
val msb =
Mux(a === UInt(0) || a === UInt(1), e =/= SInt(0),
Mux(a.asSInt === SInt(-1) || a.asSInt === SInt(-2), e === SInt(-1), e(0)))
Cat(msb, ea(vaddrBits-1,0))
}
class Scoreboard(n: Int, zero: Boolean = false)
{
def set(en: Bool, addr: UInt): Unit = update(en, _next | mask(en, addr))
def clear(en: Bool, addr: UInt): Unit = update(en, _next & ~mask(en, addr))
def read(addr: UInt): Bool = r(addr)
def readBypassed(addr: UInt): Bool = _next(addr)
private val _r = Reg(init=Bits(0, n))
private val r = if (zero) (_r >> 1 << 1) else _r
private var _next = r
private var ens = Bool(false)
private def mask(en: Bool, addr: UInt) = Mux(en, UInt(1) << addr, UInt(0))
private def update(en: Bool, update: UInt) = {
_next = update
ens = ens || en
when (ens) { _r := _next }
}
}
}

View File

@ -0,0 +1,166 @@
package rocket
import Chisel._
import Chisel.ImplicitConversions._
import Util._
import cde.Parameters
import uncore.util._
class ExpandedInstruction extends Bundle {
val bits = UInt(width = 32)
val rd = UInt(width = 5)
val rs1 = UInt(width = 5)
val rs2 = UInt(width = 5)
val rs3 = UInt(width = 5)
}
class RVCDecoder(x: UInt)(implicit p: Parameters) {
def inst(bits: UInt, rd: UInt = x(11,7), rs1: UInt = x(19,15), rs2: UInt = x(24,20), rs3: UInt = x(31,27)) = {
val res = Wire(new ExpandedInstruction)
res.bits := bits
res.rd := rd
res.rs1 := rs1
res.rs2 := rs2
res.rs3 := rs3
res
}
def rs1p = Cat(UInt(1,2), x(9,7))
def rs2p = Cat(UInt(1,2), x(4,2))
def rs2 = x(6,2)
def rd = x(11,7)
def addi4spnImm = Cat(x(10,7), x(12,11), x(5), x(6), UInt(0,2))
def lwImm = Cat(x(5), x(12,10), x(6), UInt(0,2))
def ldImm = Cat(x(6,5), x(12,10), UInt(0,3))
def lwspImm = Cat(x(3,2), x(12), x(6,4), UInt(0,2))
def ldspImm = Cat(x(4,2), x(12), x(6,5), UInt(0,3))
def swspImm = Cat(x(8,7), x(12,9), UInt(0,2))
def sdspImm = Cat(x(9,7), x(12,10), UInt(0,3))
def luiImm = Cat(Fill(15, x(12)), x(6,2), UInt(0,12))
def addi16spImm = Cat(Fill(3, x(12)), x(4,3), x(5), x(2), x(6), UInt(0,4))
def addiImm = Cat(Fill(7, x(12)), x(6,2))
def jImm = Cat(Fill(10, x(12)), x(8), x(10,9), x(6), x(7), x(2), x(11), x(5,3), UInt(0,1))
def bImm = Cat(Fill(5, x(12)), x(6,5), x(2), x(11,10), x(4,3), UInt(0,1))
def shamt = Cat(x(12), x(6,2))
def x0 = UInt(0,5)
def ra = UInt(1,5)
def sp = UInt(2,5)
def q0 = {
def addi4spn = {
val opc = Mux(x(12,5).orR, UInt(0x13,7), UInt(0x1F,7))
inst(Cat(addi4spnImm, sp, UInt(0,3), rs2p, opc), rs2p, sp, rs2p)
}
def ld = inst(Cat(ldImm, rs1p, UInt(3,3), rs2p, UInt(0x03,7)), rs2p, rs1p, rs2p)
def lw = inst(Cat(lwImm, rs1p, UInt(2,3), rs2p, UInt(0x03,7)), rs2p, rs1p, rs2p)
def fld = inst(Cat(ldImm, rs1p, UInt(3,3), rs2p, UInt(0x07,7)), rs2p, rs1p, rs2p)
def flw = {
if (p(XLen) == 32) inst(Cat(lwImm, rs1p, UInt(2,3), rs2p, UInt(0x07,7)), rs2p, rs1p, rs2p)
else ld
}
def unimp = inst(Cat(lwImm >> 5, rs2p, rs1p, UInt(2,3), lwImm(4,0), UInt(0x2F,7)), rs2p, rs1p, rs2p)
def sd = inst(Cat(ldImm >> 5, rs2p, rs1p, UInt(3,3), ldImm(4,0), UInt(0x23,7)), rs2p, rs1p, rs2p)
def sw = inst(Cat(lwImm >> 5, rs2p, rs1p, UInt(2,3), lwImm(4,0), UInt(0x23,7)), rs2p, rs1p, rs2p)
def fsd = inst(Cat(ldImm >> 5, rs2p, rs1p, UInt(3,3), ldImm(4,0), UInt(0x27,7)), rs2p, rs1p, rs2p)
def fsw = {
if (p(XLen) == 32) inst(Cat(lwImm >> 5, rs2p, rs1p, UInt(2,3), lwImm(4,0), UInt(0x27,7)), rs2p, rs1p, rs2p)
else sd
}
Seq(addi4spn, fld, lw, flw, unimp, fsd, sw, fsw)
}
def q1 = {
def addi = inst(Cat(addiImm, rd, UInt(0,3), rd, UInt(0x13,7)), rd, rd, rs2p)
def addiw = {
val opc = Mux(rd.orR, UInt(0x1B,7), UInt(0x1F,7))
inst(Cat(addiImm, rd, UInt(0,3), rd, opc), rd, rd, rs2p)
}
def jal = {
if (p(XLen) == 32) inst(Cat(jImm(20), jImm(10,1), jImm(11), jImm(19,12), ra, UInt(0x6F,7)), ra, rd, rs2p)
else addiw
}
def li = inst(Cat(addiImm, x0, UInt(0,3), rd, UInt(0x13,7)), rd, x0, rs2p)
def addi16sp = {
val opc = Mux(addiImm.orR, UInt(0x13,7), UInt(0x1F,7))
inst(Cat(addi16spImm, rd, UInt(0,3), rd, opc), rd, rd, rs2p)
}
def lui = {
val opc = Mux(addiImm.orR, UInt(0x37,7), UInt(0x3F,7))
val me = inst(Cat(luiImm(31,12), rd, opc), rd, rd, rs2p)
Mux(rd === x0 || rd === sp, addi16sp, me)
}
def j = inst(Cat(jImm(20), jImm(10,1), jImm(11), jImm(19,12), x0, UInt(0x6F,7)), x0, rs1p, rs2p)
def beqz = inst(Cat(bImm(12), bImm(10,5), x0, rs1p, UInt(0,3), bImm(4,1), bImm(11), UInt(0x63,7)), rs1p, rs1p, x0)
def bnez = inst(Cat(bImm(12), bImm(10,5), x0, rs1p, UInt(1,3), bImm(4,1), bImm(11), UInt(0x63,7)), x0, rs1p, x0)
def arith = {
def srli = Cat(shamt, rs1p, UInt(5,3), rs1p, UInt(0x13,7))
def srai = srli | UInt(1 << 30)
def andi = Cat(addiImm, rs1p, UInt(7,3), rs1p, UInt(0x13,7))
def rtype = {
val funct = Seq(0.U, 4.U, 6.U, 7.U, 0.U, 0.U, 2.U, 3.U)(Cat(x(12), x(6,5)))
val sub = Mux(x(6,5) === UInt(0), UInt(1 << 30), UInt(0))
val opc = Mux(x(12), UInt(0x3B,7), UInt(0x33,7))
Cat(rs2p, rs1p, funct, rs1p, opc) | sub
}
inst(Seq(srli, srai, andi, rtype)(x(11,10)), rs1p, rs1p, rs2p)
}
Seq(addi, jal, li, lui, arith, j, beqz, bnez)
}
def q2 = {
def slli = inst(Cat(shamt, rd, UInt(1,3), rd, UInt(0x13,7)), rd, rd, rs2)
def ldsp = inst(Cat(ldspImm, sp, UInt(3,3), rd, UInt(0x03,7)), rd, sp, rs2)
def lwsp = inst(Cat(lwspImm, sp, UInt(2,3), rd, UInt(0x03,7)), rd, sp, rs2)
def fldsp = inst(Cat(ldspImm, sp, UInt(3,3), rd, UInt(0x07,7)), rd, sp, rs2)
def flwsp = {
if (p(XLen) == 32) inst(Cat(lwspImm, sp, UInt(2,3), rd, UInt(0x07,7)), rd, sp, rs2)
else ldsp
}
def sdsp = inst(Cat(sdspImm >> 5, rs2, sp, UInt(3,3), sdspImm(4,0), UInt(0x23,7)), rd, sp, rs2)
def swsp = inst(Cat(swspImm >> 5, rs2, sp, UInt(2,3), swspImm(4,0), UInt(0x23,7)), rd, sp, rs2)
def fsdsp = inst(Cat(sdspImm >> 5, rs2, sp, UInt(3,3), sdspImm(4,0), UInt(0x27,7)), rd, sp, rs2)
def fswsp = {
if (p(XLen) == 32) inst(Cat(swspImm >> 5, rs2, sp, UInt(2,3), swspImm(4,0), UInt(0x27,7)), rd, sp, rs2)
else sdsp
}
def jalr = {
val mv = inst(Cat(rs2, x0, UInt(0,3), rd, UInt(0x33,7)), rd, x0, rs2)
val add = inst(Cat(rs2, rd, UInt(0,3), rd, UInt(0x33,7)), rd, rd, rs2)
val jr = Cat(rs2, rd, UInt(0,3), x0, UInt(0x67,7))
val reserved = Cat(jr >> 7, UInt(0x1F,7))
val jr_reserved = inst(Mux(rd.orR, jr, reserved), x0, rd, rs2)
val jr_mv = Mux(rs2.orR, mv, jr_reserved)
val jalr = Cat(rs2, rd, UInt(0,3), ra, UInt(0x67,7))
val ebreak = Cat(jr >> 7, UInt(0x73,7)) | UInt(1 << 20)
val jalr_ebreak = inst(Mux(rd.orR, jalr, ebreak), ra, rd, rs2)
val jalr_add = Mux(rs2.orR, add, jalr_ebreak)
Mux(x(12), jalr_add, jr_mv)
}
Seq(slli, fldsp, lwsp, flwsp, jalr, fsdsp, swsp, fswsp)
}
def q3 = Seq.fill(8)(passthrough)
def passthrough = inst(x)
def decode = {
val s = q0 ++ q1 ++ q2 ++ q3
s(Cat(x(1,0), x(15,13)))
}
}
class RVCExpander(implicit p: Parameters) extends Module {
val io = new Bundle {
val in = UInt(INPUT, 32)
val out = new ExpandedInstruction
val rvc = Bool(OUTPUT)
}
if (p(UseCompressed)) {
io.rvc := io.in(1,0) =/= UInt(3)
io.out := new RVCDecoder(io.in).decode
} else {
io.rvc := Bool(false)
io.out := new RVCDecoder(io.in).passthrough
}
}

View File

@ -0,0 +1,134 @@
// See LICENSE for license details.
package rocket
import Chisel._
import uncore.tilelink._
import uncore.agents._
import uncore.devices._
import Util._
import cde.{Parameters, Field}
case object BuildRoCC extends Field[Seq[RoccParameters]]
case object NCachedTileLinkPorts extends Field[Int]
case object NUncachedTileLinkPorts extends Field[Int]
case class RoccParameters(
opcodes: OpcodeSet,
generator: Parameters => RoCC,
nMemChannels: Int = 0,
nPTWPorts : Int = 0,
useFPU: Boolean = false)
abstract class Tile(clockSignal: Clock = null, resetSignal: Bool = null)
(implicit p: Parameters) extends Module(Option(clockSignal), Option(resetSignal)) {
val nCachedTileLinkPorts = p(NCachedTileLinkPorts)
val nUncachedTileLinkPorts = p(NUncachedTileLinkPorts)
val dcacheParams = p.alterPartial({ case CacheName => "L1D" })
class TileIO extends Bundle {
val cached = Vec(nCachedTileLinkPorts, new ClientTileLinkIO)
val uncached = Vec(nUncachedTileLinkPorts, new ClientUncachedTileLinkIO)
val prci = new PRCITileIO().flip
}
val io = new TileIO
}
class RocketTile(clockSignal: Clock = null, resetSignal: Bool = null)
(implicit p: Parameters) extends Tile(clockSignal, resetSignal)(p) {
val buildRocc = p(BuildRoCC)
val usingRocc = !buildRocc.isEmpty
val nRocc = buildRocc.size
val nFPUPorts = buildRocc.filter(_.useFPU).size
val core = Module(new Rocket)
val icache = Module(new Frontend()(p.alterPartial({ case CacheName => "L1I" })))
val dcache = HellaCache(p(DCacheKey))(dcacheParams)
val ptwPorts = collection.mutable.ArrayBuffer(icache.io.ptw, dcache.ptw)
val dcPorts = collection.mutable.ArrayBuffer(core.io.dmem)
val uncachedArbPorts = collection.mutable.ArrayBuffer(icache.io.mem)
val uncachedPorts = collection.mutable.ArrayBuffer[ClientUncachedTileLinkIO]()
val cachedPorts = collection.mutable.ArrayBuffer(dcache.mem)
core.io.prci <> io.prci
icache.io.cpu <> core.io.imem
val fpuOpt = p(FPUKey).map(cfg => Module(new FPU(cfg)))
fpuOpt.foreach(fpu => core.io.fpu <> fpu.io)
if (usingRocc) {
val respArb = Module(new RRArbiter(new RoCCResponse, nRocc))
core.io.rocc.resp <> respArb.io.out
val roccOpcodes = buildRocc.map(_.opcodes)
val cmdRouter = Module(new RoccCommandRouter(roccOpcodes))
cmdRouter.io.in <> core.io.rocc.cmd
val roccs = buildRocc.zipWithIndex.map { case (accelParams, i) =>
val rocc = accelParams.generator(p.alterPartial({
case RoccNMemChannels => accelParams.nMemChannels
case RoccNPTWPorts => accelParams.nPTWPorts
}))
val dcIF = Module(new SimpleHellaCacheIF()(dcacheParams))
rocc.io.cmd <> cmdRouter.io.out(i)
rocc.io.exception := core.io.rocc.exception
dcIF.io.requestor <> rocc.io.mem
dcPorts += dcIF.io.cache
uncachedArbPorts += rocc.io.autl
rocc
}
if (nFPUPorts > 0) {
fpuOpt.foreach { fpu =>
val fpArb = Module(new InOrderArbiter(new FPInput, new FPResult, nFPUPorts))
val fp_roccs = roccs.zip(buildRocc)
.filter { case (_, params) => params.useFPU }
.map { case (rocc, _) => rocc.io }
fpArb.io.in_req <> fp_roccs.map(_.fpu_req)
fp_roccs.zip(fpArb.io.in_resp).foreach {
case (rocc, fpu_resp) => rocc.fpu_resp <> fpu_resp
}
fpu.io.cp_req <> fpArb.io.out_req
fpArb.io.out_resp <> fpu.io.cp_resp
}
}
core.io.rocc.busy := cmdRouter.io.busy || roccs.map(_.io.busy).reduce(_ || _)
core.io.rocc.interrupt := roccs.map(_.io.interrupt).reduce(_ || _)
respArb.io.in <> roccs.map(rocc => Queue(rocc.io.resp))
ptwPorts ++= roccs.flatMap(_.io.ptw)
uncachedPorts ++= roccs.flatMap(_.io.utl)
}
val uncachedArb = Module(new ClientUncachedTileLinkIOArbiter(uncachedArbPorts.size))
uncachedArb.io.in <> uncachedArbPorts
uncachedArb.io.out +=: uncachedPorts
// Connect the caches and RoCC to the outer memory system
io.uncached <> uncachedPorts
io.cached <> cachedPorts
// TODO remove nCached/nUncachedTileLinkPorts parameters and these assertions
require(uncachedPorts.size == nUncachedTileLinkPorts)
require(cachedPorts.size == nCachedTileLinkPorts)
if (p(UseVM)) {
val ptw = Module(new PTW(ptwPorts.size)(dcacheParams))
ptw.io.requestor <> ptwPorts
ptw.io.mem +=: dcPorts
core.io.ptw <> ptw.io.dpath
}
require(dcPorts.size == core.dcacheArbPorts)
val dcArb = Module(new HellaCacheArbiter(dcPorts.size)(dcacheParams))
dcArb.io.requestor <> dcPorts
dcache.cpu <> dcArb.io.mem
if (nFPUPorts == 0) {
fpuOpt.foreach { fpu =>
fpu.io.cp_req.valid := Bool(false)
fpu.io.cp_resp.ready := Bool(false)
}
}
}

View File

@ -0,0 +1,193 @@
// See LICENSE for license details.
package rocket
import Chisel._
import Util._
import junctions._
import scala.math._
import cde.{Parameters, Field}
import uncore.agents.PseudoLRU
import uncore.coherence._
import uncore.util._
case object PgLevels extends Field[Int]
case object ASIdBits extends Field[Int]
case object NTLBEntries extends Field[Int]
trait HasTLBParameters extends HasCoreParameters {
val entries = p(NTLBEntries)
val camAddrBits = log2Ceil(entries)
val camTagBits = asIdBits + vpnBits
}
class TLBReq(implicit p: Parameters) extends CoreBundle()(p) {
val vpn = UInt(width = vpnBitsExtended)
val passthrough = Bool()
val instruction = Bool()
val store = Bool()
}
class TLBResp(implicit p: Parameters) extends CoreBundle()(p) {
// lookup responses
val miss = Bool(OUTPUT)
val ppn = UInt(OUTPUT, ppnBits)
val xcpt_ld = Bool(OUTPUT)
val xcpt_st = Bool(OUTPUT)
val xcpt_if = Bool(OUTPUT)
val cacheable = Bool(OUTPUT)
}
class TLB(implicit val p: Parameters) extends Module with HasTLBParameters {
val io = new Bundle {
val req = Decoupled(new TLBReq).flip
val resp = new TLBResp
val ptw = new TLBPTWIO
}
val valid = Reg(init = UInt(0, entries))
val ppns = Reg(Vec(entries, UInt(width = ppnBits)))
val tags = Reg(Vec(entries, UInt(width = asIdBits + vpnBits)))
val s_ready :: s_request :: s_wait :: s_wait_invalidate :: Nil = Enum(UInt(), 4)
val state = Reg(init=s_ready)
val r_refill_tag = Reg(UInt(width = asIdBits + vpnBits))
val r_refill_waddr = Reg(UInt(width = log2Ceil(entries)))
val r_req = Reg(new TLBReq)
val do_mprv = io.ptw.status.mprv && !io.req.bits.instruction
val priv = Mux(do_mprv, io.ptw.status.mpp, io.ptw.status.prv)
val priv_s = priv === PRV.S
val priv_uses_vm = priv <= PRV.S && !io.ptw.status.debug
// share a single physical memory attribute checker (unshare if critical path)
val passthrough_ppn = io.req.bits.vpn(ppnBits-1, 0)
val refill_ppn = io.ptw.resp.bits.pte.ppn(ppnBits-1, 0)
val do_refill = Bool(usingVM) && io.ptw.resp.valid
val mpu_ppn = Mux(do_refill, refill_ppn, passthrough_ppn)
val prot = addrMap.getProt(mpu_ppn << pgIdxBits)
val cacheable = addrMap.isCacheable(mpu_ppn << pgIdxBits)
def pgaligned(r: MemRegion) = {
val pgsize = 1 << pgIdxBits
(r.start % pgsize) == 0 && (r.size % pgsize) == 0
}
require(addrMap.flatten.forall(e => pgaligned(e.region)),
"MemoryMap regions must be page-aligned")
val lookup_tag = Cat(io.ptw.ptbr.asid, io.req.bits.vpn(vpnBits-1,0))
val vm_enabled = Bool(usingVM) && io.ptw.status.vm(3) && priv_uses_vm && !io.req.bits.passthrough
val hitsVec = (0 until entries).map(i => valid(i) && vm_enabled && tags(i) === lookup_tag) :+ !vm_enabled
val hits = hitsVec.asUInt
// permission bit arrays
val pte_array = Reg(new PTE)
val u_array = Reg(UInt(width = entries)) // user permission
val sw_array = Reg(UInt(width = entries)) // write permission
val sx_array = Reg(UInt(width = entries)) // execute permission
val sr_array = Reg(UInt(width = entries)) // read permission
val xr_array = Reg(UInt(width = entries)) // read permission to executable page
val cash_array = Reg(UInt(width = entries)) // cacheable
val dirty_array = Reg(UInt(width = entries)) // PTE dirty bit
when (do_refill) {
val pte = io.ptw.resp.bits.pte
ppns(r_refill_waddr) := pte.ppn
tags(r_refill_waddr) := r_refill_tag
val mask = UIntToOH(r_refill_waddr)
valid := valid | mask
u_array := Mux(pte.u, u_array | mask, u_array & ~mask)
sw_array := Mux(pte.sw() && prot.w, sw_array | mask, sw_array & ~mask)
sx_array := Mux(pte.sx() && prot.x, sx_array | mask, sx_array & ~mask)
sr_array := Mux(pte.sr() && prot.r, sr_array | mask, sr_array & ~mask)
xr_array := Mux(pte.sx() && prot.r, xr_array | mask, xr_array & ~mask)
cash_array := Mux(cacheable, cash_array | mask, cash_array & ~mask)
dirty_array := Mux(pte.d, dirty_array | mask, dirty_array & ~mask)
}
val plru = new PseudoLRU(entries)
val repl_waddr = Mux(!valid.andR, PriorityEncoder(~valid), plru.replace)
val priv_ok = Mux(priv_s, ~Mux(io.ptw.status.pum, u_array, UInt(0)), u_array)
val w_array = Cat(prot.w, priv_ok & sw_array)
val x_array = Cat(prot.x, priv_ok & sx_array)
val r_array = Cat(prot.r, priv_ok & (sr_array | Mux(io.ptw.status.mxr, xr_array, UInt(0))))
val c_array = Cat(cacheable, cash_array)
val bad_va =
if (vpnBits == vpnBitsExtended) Bool(false)
else io.req.bits.vpn(vpnBits) =/= io.req.bits.vpn(vpnBits-1)
// it's only a store hit if the dirty bit is set
val tlb_hits = hits(entries-1, 0) & (dirty_array | ~Mux(io.req.bits.store, w_array, UInt(0)))
val tlb_hit = tlb_hits.orR
val tlb_miss = vm_enabled && !bad_va && !tlb_hit
when (io.req.valid && !tlb_miss) {
plru.access(OHToUInt(hits(entries-1, 0)))
}
io.req.ready := state === s_ready
io.resp.xcpt_ld := bad_va || (~r_array & hits).orR
io.resp.xcpt_st := bad_va || (~w_array & hits).orR
io.resp.xcpt_if := bad_va || (~x_array & hits).orR
io.resp.cacheable := (c_array & hits).orR
io.resp.miss := do_refill || tlb_miss
io.resp.ppn := Mux1H(hitsVec, ppns :+ passthrough_ppn)
io.ptw.req.valid := state === s_request
io.ptw.req.bits := io.ptw.status
io.ptw.req.bits.addr := r_refill_tag
io.ptw.req.bits.store := r_req.store
io.ptw.req.bits.fetch := r_req.instruction
if (usingVM) {
when (io.req.fire() && tlb_miss) {
state := s_request
r_refill_tag := lookup_tag
r_refill_waddr := repl_waddr
r_req := io.req.bits
}
when (state === s_request) {
when (io.ptw.invalidate) {
state := s_ready
}
when (io.ptw.req.ready) {
state := s_wait
when (io.ptw.invalidate) { state := s_wait_invalidate }
}
}
when (state === s_wait && io.ptw.invalidate) {
state := s_wait_invalidate
}
when (io.ptw.resp.valid) {
state := s_ready
}
when (io.ptw.invalidate) {
valid := 0
}
}
}
class DecoupledTLB(implicit p: Parameters) extends Module {
val io = new Bundle {
val req = Decoupled(new TLBReq).flip
val resp = Decoupled(new TLBResp)
val ptw = new TLBPTWIO
}
val reqq = Queue(io.req)
val tlb = Module(new TLB)
val resp_helper = DecoupledHelper(
reqq.valid, tlb.io.req.ready, io.resp.ready)
val tlb_miss = tlb.io.resp.miss
tlb.io.req.valid := resp_helper.fire(tlb.io.req.ready)
tlb.io.req.bits := reqq.bits
reqq.ready := resp_helper.fire(reqq.valid, !tlb_miss)
io.resp.valid := resp_helper.fire(io.resp.ready, !tlb_miss)
io.resp.bits := tlb.io.resp
io.ptw <> tlb.io.ptw
}

View File

@ -0,0 +1,159 @@
// See LICENSE for license details.
package rocket
import Chisel._
import uncore.util._
import scala.math._
import cde.{Parameters, Field}
object Util {
implicit def uintToBitPat(x: UInt): BitPat = BitPat(x)
implicit def intToUInt(x: Int): UInt = UInt(x)
implicit def bigIntToUInt(x: BigInt): UInt = UInt(x)
implicit def booleanToBool(x: Boolean): Bits = Bool(x)
implicit def intSeqToUIntSeq(x: Seq[Int]): Seq[UInt] = x.map(UInt(_))
implicit def wcToUInt(c: WideCounter): UInt = c.value
implicit class UIntToAugmentedUInt(val x: UInt) extends AnyVal {
def sextTo(n: Int): UInt =
if (x.getWidth == n) x
else Cat(Fill(n - x.getWidth, x(x.getWidth-1)), x)
def extract(hi: Int, lo: Int): UInt = {
if (hi == lo-1) UInt(0)
else x(hi, lo)
}
}
implicit def booleanToIntConv(x: Boolean) = new AnyRef {
def toInt: Int = if (x) 1 else 0
}
}
import Util._
object Str
{
def apply(s: String): UInt = {
var i = BigInt(0)
require(s.forall(validChar _))
for (c <- s)
i = (i << 8) | c
UInt(i, s.length*8)
}
def apply(x: Char): UInt = {
require(validChar(x))
UInt(x.toInt, 8)
}
def apply(x: UInt): UInt = apply(x, 10)
def apply(x: UInt, radix: Int): UInt = {
val rad = UInt(radix)
val w = x.getWidth
require(w > 0)
var q = x
var s = digit(q % rad)
for (i <- 1 until ceil(log(2)/log(radix)*w).toInt) {
q = q / rad
s = Cat(Mux(Bool(radix == 10) && q === UInt(0), Str(' '), digit(q % rad)), s)
}
s
}
def apply(x: SInt): UInt = apply(x, 10)
def apply(x: SInt, radix: Int): UInt = {
val neg = x < SInt(0)
val abs = x.abs
if (radix != 10) {
Cat(Mux(neg, Str('-'), Str(' ')), Str(abs, radix))
} else {
val rad = UInt(radix)
val w = abs.getWidth
require(w > 0)
var q = abs
var s = digit(q % rad)
var needSign = neg
for (i <- 1 until ceil(log(2)/log(radix)*w).toInt) {
q = q / rad
val placeSpace = q === UInt(0)
val space = Mux(needSign, Str('-'), Str(' '))
needSign = needSign && !placeSpace
s = Cat(Mux(placeSpace, space, digit(q % rad)), s)
}
Cat(Mux(needSign, Str('-'), Str(' ')), s)
}
}
private def digit(d: UInt): UInt = Mux(d < UInt(10), Str('0')+d, Str(('a'-10).toChar)+d)(7,0)
private def validChar(x: Char) = x == (x & 0xFF)
}
object Split
{
// is there a better way to do do this?
def apply(x: Bits, n0: Int) = {
val w = checkWidth(x, n0)
(x(w-1,n0), x(n0-1,0))
}
def apply(x: Bits, n1: Int, n0: Int) = {
val w = checkWidth(x, n1, n0)
(x(w-1,n1), x(n1-1,n0), x(n0-1,0))
}
def apply(x: Bits, n2: Int, n1: Int, n0: Int) = {
val w = checkWidth(x, n2, n1, n0)
(x(w-1,n2), x(n2-1,n1), x(n1-1,n0), x(n0-1,0))
}
private def checkWidth(x: Bits, n: Int*) = {
val w = x.getWidth
def decreasing(x: Seq[Int]): Boolean =
if (x.tail.isEmpty) true
else x.head >= x.tail.head && decreasing(x.tail)
require(decreasing(w :: n.toList))
w
}
}
// a counter that clock gates most of its MSBs using the LSB carry-out
case class WideCounter(width: Int, inc: UInt = UInt(1))
{
private val isWide = width > 2*inc.getWidth
private val smallWidth = if (isWide) inc.getWidth max log2Up(width) else width
private val small = Reg(init=UInt(0, smallWidth))
private val nextSmall = small +& inc
small := nextSmall
private val large = if (isWide) {
val r = Reg(init=UInt(0, width - smallWidth))
when (nextSmall(smallWidth)) { r := r + UInt(1) }
r
} else null
val value = if (isWide) Cat(large, small) else small
def := (x: UInt) = {
small := x
if (isWide) large := x >> smallWidth
}
}
object Random
{
def apply(mod: Int, random: UInt): UInt = {
if (isPow2(mod)) random(log2Up(mod)-1,0)
else PriorityEncoder(partition(apply(1 << log2Up(mod*8), random), mod))
}
def apply(mod: Int): UInt = apply(mod, randomizer)
def oneHot(mod: Int, random: UInt): UInt = {
if (isPow2(mod)) UIntToOH(random(log2Up(mod)-1,0))
else PriorityEncoderOH(partition(apply(1 << log2Up(mod*8), random), mod)).asUInt
}
def oneHot(mod: Int): UInt = oneHot(mod, randomizer)
private def randomizer = LFSR16()
private def round(x: Double): Int =
if (x.toInt.toDouble == x) x.toInt else (x.toInt + 1) & -2
private def partition(value: UInt, slices: Int) =
Seq.tabulate(slices)(i => value < round((i << value.getWidth).toDouble / slices))
}

View File

@ -0,0 +1,117 @@
package uncore
import Chisel._
import cde.{Config, Parameters, ParameterDump, Knob, Dump}
import junctions.PAddrBits
import uncore.tilelink._
import uncore.agents._
import uncore.coherence._
object UncoreBuilder extends App {
val topModuleName = args(0)
val configClassName = args(1)
val config = try {
Class.forName(s"uncore.$configClassName").newInstance.asInstanceOf[Config]
} catch {
case e: java.lang.ClassNotFoundException =>
throwException("Unable to find configClassName \"" + configClassName +
"\", did you misspell it?", e)
}
val world = config.toInstance
val paramsFromConfig: Parameters = Parameters.root(world)
val gen = () =>
Class.forName(s"uncore.$topModuleName")
.getConstructor(classOf[cde.Parameters])
.newInstance(paramsFromConfig)
.asInstanceOf[Module]
chiselMain.run(args.drop(2), gen)
val pdFile = new java.io.FileWriter(s"${Driver.targetDir}/$topModuleName.prm")
pdFile.write(ParameterDump.getDump)
pdFile.close
}
class DefaultL2Config extends Config (
topDefinitions = { (pname,site,here) =>
pname match {
case PAddrBits => 32
case CacheId => 0
case CacheName => "L2Bank"
case TLId => "L1toL2"
case InnerTLId => "L1toL2"
case OuterTLId => "L2toMC"
case "N_CACHED" => Dump("N_CACHED",here[Int]("CACHED_CLIENTS_PER_PORT"))
case "N_UNCACHED" => Dump("N_UNCACHED",here[Int]("MAX_CLIENTS_PER_PORT") - here[Int]("N_CACHED"))
case "MAX_CLIENT_XACTS" => 4
case "MAX_CLIENTS_PER_PORT" => Knob("NTILES")
case "CACHED_CLIENTS_PER_PORT" => Knob("N_CACHED_TILES")
case TLKey("L1toL2") =>
TileLinkParameters(
coherencePolicy = new MESICoherence(site(L2DirectoryRepresentation)),
nManagers = 1,
nCachingClients = here[Int]("N_CACHED"),
nCachelessClients = here[Int]("N_UNCACHED"),
maxClientXacts = here[Int]("MAX_CLIENT_XACTS"),
maxClientsPerPort = here[Int]("MAX_CLIENTS_PER_PORT"),
maxManagerXacts = site(NAcquireTransactors) + 2,
dataBits = site(CacheBlockBytes)*8,
dataBeats = 2)
case TLKey("L2toMC") =>
TileLinkParameters(
coherencePolicy = new MEICoherence(new NullRepresentation(1)),
nManagers = 1,
nCachingClients = 1,
nCachelessClients = 0,
maxClientXacts = 1,
maxClientsPerPort = site(NAcquireTransactors) + 2,
maxManagerXacts = 1,
dataBits = site(CacheBlockBytes)*8,
dataBeats = 2)
case CacheBlockBytes => 64
case CacheBlockOffsetBits => log2Up(here(CacheBlockBytes))
case "L2_SETS" => Knob("L2_SETS")
case NSets => Dump("L2_SETS",here[Int]("L2_SETS"))
case NWays => Knob("L2_WAYS")
case RowBits => site(TLKey(site(TLId))).dataBitsPerBeat
case CacheIdBits => Dump("CACHE_ID_BITS",1)
case L2StoreDataQueueDepth => 1
case NAcquireTransactors => Dump("N_ACQUIRE_TRANSACTORS",2)
case NSecondaryMisses => 4
case L2DirectoryRepresentation => new FullRepresentation(here[Int]("N_CACHED"))
case L2Replacer => () => new SeqRandom(site(NWays))
case ECCCode => None
case AmoAluOperandBits => 64
case SplitMetadata => false
// case XLen => 128
}},
knobValues = {
case "L2_WAYS" => 1
case "L2_SETS" => 1024
case "NTILES" => 2
case "N_CACHED_TILES" => 2
case "L2_CAPACITY_IN_KB" => 256
}
)
class WithPLRU extends Config(
(pname, site, here) => pname match {
case L2Replacer => () => new SeqPLRU(site(NSets), site(NWays))
})
class PLRUL2Config extends Config(new WithPLRU ++ new DefaultL2Config)
class With1L2Ways extends Config(knobValues = { case "L2_WAYS" => 1 })
class With2L2Ways extends Config(knobValues = { case "L2_WAYS" => 2 })
class With4L2Ways extends Config(knobValues = { case "L2_WAYS" => 4 })
class With1Cached extends Config(knobValues = { case "N_CACHED_TILES" => 1 })
class With2Cached extends Config(knobValues = { case "N_CACHED_TILES" => 2 })
class W1Cached1WaysConfig extends Config(new With1L2Ways ++ new With1Cached ++ new DefaultL2Config)
class W1Cached2WaysConfig extends Config(new With2L2Ways ++ new With1Cached ++ new DefaultL2Config)
class W2Cached1WaysConfig extends Config(new With1L2Ways ++ new With2Cached ++ new DefaultL2Config)
class W2Cached2WaysConfig extends Config(new With2L2Ways ++ new With2Cached ++ new DefaultL2Config)

View File

@ -0,0 +1,39 @@
// See LICENSE for license details.
package uncore
package constants
import Chisel._
object MemoryOpConstants extends MemoryOpConstants
trait MemoryOpConstants {
val NUM_XA_OPS = 9
val M_SZ = 5
val M_X = BitPat("b?????");
val M_XRD = UInt("b00000"); // int load
val M_XWR = UInt("b00001"); // int store
val M_PFR = UInt("b00010"); // prefetch with intent to read
val M_PFW = UInt("b00011"); // prefetch with intent to write
val M_XA_SWAP = UInt("b00100");
val M_FLUSH_ALL = UInt("b00101") // flush all lines
val M_XLR = UInt("b00110");
val M_XSC = UInt("b00111");
val M_XA_ADD = UInt("b01000");
val M_XA_XOR = UInt("b01001");
val M_XA_OR = UInt("b01010");
val M_XA_AND = UInt("b01011");
val M_XA_MIN = UInt("b01100");
val M_XA_MAX = UInt("b01101");
val M_XA_MINU = UInt("b01110");
val M_XA_MAXU = UInt("b01111");
val M_FLUSH = UInt("b10000") // write back dirty data and cede R/W permissions
val M_PRODUCE = UInt("b10001") // write back dirty data and cede W permissions
val M_CLEAN = UInt("b10011") // write back dirty data and retain R/W permissions
def isAMO(cmd: UInt) = cmd(3) || cmd === M_XA_SWAP
def isPrefetch(cmd: UInt) = cmd === M_PFR || cmd === M_PFW
def isRead(cmd: UInt) = cmd === M_XRD || cmd === M_XLR || cmd === M_XSC || isAMO(cmd)
def isWrite(cmd: UInt) = cmd === M_XWR || cmd === M_XSC || isAMO(cmd)
def isWriteIntent(cmd: UInt) = isWrite(cmd) || cmd === M_PFW || cmd === M_XLR
}

View File

@ -0,0 +1,4 @@
// See LICENSE for license details.
package uncore
package object constants extends uncore.constants.MemoryOpConstants

View File

@ -0,0 +1,162 @@
// See LICENSE for license details.
package uncore.agents
import Chisel._
import cde.{Parameters, Field}
import junctions._
import uncore.tilelink._
import uncore.converters._
import uncore.coherence._
import uncore.util._
case object NReleaseTransactors extends Field[Int]
case object NProbeTransactors extends Field[Int]
case object NAcquireTransactors extends Field[Int]
trait HasCoherenceAgentParameters {
implicit val p: Parameters
val nReleaseTransactors = 1
val nAcquireTransactors = p(NAcquireTransactors)
val nTransactors = nReleaseTransactors + nAcquireTransactors
val blockAddrBits = p(PAddrBits) - p(CacheBlockOffsetBits)
val outerTLId = p(OuterTLId)
val outerTLParams = p(TLKey(outerTLId))
val outerDataBeats = outerTLParams.dataBeats
val outerDataBits = outerTLParams.dataBitsPerBeat
val outerBeatAddrBits = log2Up(outerDataBeats)
val outerByteAddrBits = log2Up(outerDataBits/8)
val outerWriteMaskBits = outerTLParams.writeMaskBits
val innerTLId = p(InnerTLId)
val innerTLParams = p(TLKey(innerTLId))
val innerDataBeats = innerTLParams.dataBeats
val innerDataBits = innerTLParams.dataBitsPerBeat
val innerWriteMaskBits = innerTLParams.writeMaskBits
val innerBeatAddrBits = log2Up(innerDataBeats)
val innerByteAddrBits = log2Up(innerDataBits/8)
val innerNCachingClients = innerTLParams.nCachingClients
val maxManagerXacts = innerTLParams.maxManagerXacts
require(outerDataBeats == innerDataBeats) //TODO: fix all xact_data Vecs to remove this requirement
}
abstract class CoherenceAgentModule(implicit val p: Parameters) extends Module
with HasCoherenceAgentParameters
abstract class CoherenceAgentBundle(implicit val p: Parameters) extends junctions.ParameterizedBundle()(p)
with HasCoherenceAgentParameters
trait HasCoherenceAgentWiringHelpers {
def doOutputArbitration[T <: TileLinkChannel](
out: DecoupledIO[T],
ins: Seq[DecoupledIO[T]]) {
def lock(o: T) = o.hasMultibeatData()
val arb = Module(new LockingRRArbiter(out.bits, ins.size, out.bits.tlDataBeats, Some(lock _)))
out <> arb.io.out
arb.io.in <> ins
}
def doInputRouting[T <: Bundle with HasManagerTransactionId](
in: DecoupledIO[T],
outs: Seq[DecoupledIO[T]]) {
val idx = in.bits.manager_xact_id
outs.map(_.bits := in.bits)
outs.zipWithIndex.map { case (o,i) => o.valid := in.valid && idx === UInt(i) }
in.ready := outs.map(_.ready).apply(idx)
}
/** Broadcasts valid messages on this channel to all trackers,
* but includes logic to allocate a new tracker in the case where
* no previously allocated tracker matches the new req's addr.
*
* When a match is reported, if ready is high the new transaction
* is merged; when ready is low the transaction is being blocked.
* When no match is reported, any high idles are presumed to be
* from trackers that are available for allocation, and one is
* assigned via alloc based on priority; if no idles are high then
* all trackers are busy with other transactions. If idle is high
* but ready is low, the tracker will be allocated but does not
* have sufficient buffering for the data.
*/
def doInputRoutingWithAllocation[T <: TileLinkChannel with HasTileLinkData](
in: DecoupledIO[T],
outs: Seq[DecoupledIO[T]],
allocs: Seq[TrackerAllocation],
dataOverrides: Option[Seq[UInt]] = None,
allocOverride: Option[Bool] = None,
matchOverride: Option[Bool] = None) {
val ready_bits = outs.map(_.ready).asUInt
val can_alloc_bits = allocs.map(_.can).asUInt
val should_alloc_bits = PriorityEncoderOH(can_alloc_bits)
val match_bits = allocs.map(_.matches).asUInt
val no_matches = !match_bits.orR
val alloc_ok = allocOverride.getOrElse(Bool(true))
val match_ok = matchOverride.getOrElse(Bool(true))
in.ready := (Mux(no_matches, can_alloc_bits, match_bits) & ready_bits).orR && alloc_ok && match_ok
outs.zip(allocs).zipWithIndex.foreach { case((out, alloc), i) =>
out.valid := in.valid && match_ok && alloc_ok
out.bits := in.bits
dataOverrides foreach { d => out.bits.data := d(i) }
alloc.should := should_alloc_bits(i) && no_matches && alloc_ok
}
}
}
trait HasInnerTLIO extends HasCoherenceAgentParameters {
val inner = new ManagerTileLinkIO()(p.alterPartial({case TLId => p(InnerTLId)}))
val incoherent = Vec(inner.tlNCachingClients, Bool()).asInput
def iacq(dummy: Int = 0) = inner.acquire.bits
def iprb(dummy: Int = 0) = inner.probe.bits
def irel(dummy: Int = 0) = inner.release.bits
def ignt(dummy: Int = 0) = inner.grant.bits
def ifin(dummy: Int = 0) = inner.finish.bits
}
trait HasUncachedOuterTLIO extends HasCoherenceAgentParameters {
val outer = new ClientUncachedTileLinkIO()(p.alterPartial({case TLId => p(OuterTLId)}))
def oacq(dummy: Int = 0) = outer.acquire.bits
def ognt(dummy: Int = 0) = outer.grant.bits
}
trait HasCachedOuterTLIO extends HasCoherenceAgentParameters {
val outer = new ClientTileLinkIO()(p.alterPartial({case TLId => p(OuterTLId)}))
def oacq(dummy: Int = 0) = outer.acquire.bits
def oprb(dummy: Int = 0) = outer.probe.bits
def orel(dummy: Int = 0) = outer.release.bits
def ognt(dummy: Int = 0) = outer.grant.bits
}
class ManagerTLIO(implicit p: Parameters) extends CoherenceAgentBundle()(p)
with HasInnerTLIO
with HasUncachedOuterTLIO
abstract class CoherenceAgent(implicit p: Parameters) extends CoherenceAgentModule()(p) {
def innerTL: ManagerTileLinkIO
def outerTL: ClientTileLinkIO
def incoherent: Vec[Bool]
}
abstract class ManagerCoherenceAgent(implicit p: Parameters) extends CoherenceAgent()(p)
with HasCoherenceAgentWiringHelpers {
val io = new ManagerTLIO
def innerTL = io.inner
def outerTL = TileLinkIOWrapper(io.outer)(p.alterPartial({case TLId => p(OuterTLId)}))
def incoherent = io.incoherent
}
class HierarchicalTLIO(implicit p: Parameters) extends CoherenceAgentBundle()(p)
with HasInnerTLIO
with HasCachedOuterTLIO
abstract class HierarchicalCoherenceAgent(implicit p: Parameters) extends CoherenceAgent()(p)
with HasCoherenceAgentWiringHelpers {
val io = new HierarchicalTLIO
def innerTL = io.inner
def outerTL = io.outer
def incoherent = io.incoherent
// TODO: Remove this function (and all its calls) when we support probing the L2
def disconnectOuterProbeAndFinish() {
io.outer.probe.ready := Bool(false)
io.outer.finish.valid := Bool(false)
assert(!io.outer.probe.valid, "L2 agent got illegal probe")
}
}

View File

@ -0,0 +1,204 @@
// See LICENSE for license details.
package uncore.agents
import Chisel._
import uncore.coherence._
import uncore.tilelink._
import uncore.constants._
import uncore.util._
import cde.Parameters
class L2BroadcastHub(implicit p: Parameters) extends HierarchicalCoherenceAgent()(p) {
// Create TSHRs for outstanding transactions
val irelTrackerList =
(0 until nReleaseTransactors).map(id =>
Module(new BufferedBroadcastVoluntaryReleaseTracker(id)))
val iacqTrackerList =
(nReleaseTransactors until nTransactors).map(id =>
Module(new BufferedBroadcastAcquireTracker(id)))
val trackerList = irelTrackerList ++ iacqTrackerList
// Propagate incoherence flags
trackerList.map(_.io.incoherent) foreach { _ := io.incoherent }
// Create an arbiter for the one memory port
val outerList = trackerList.map(_.io.outer)
val outer_arb = Module(new ClientTileLinkIOArbiter(outerList.size)
(p.alterPartial({ case TLId => p(OuterTLId) })))
outer_arb.io.in <> outerList
io.outer <> outer_arb.io.out
// Handle acquire transaction initiation
val irel_vs_iacq_conflict =
io.inner.acquire.valid &&
io.inner.release.valid &&
io.irel().conflicts(io.iacq())
doInputRoutingWithAllocation(
in = io.inner.acquire,
outs = trackerList.map(_.io.inner.acquire),
allocs = trackerList.map(_.io.alloc.iacq),
allocOverride = Some(!irel_vs_iacq_conflict))
// Handle releases, which might be voluntary and might have data
doInputRoutingWithAllocation(
in = io.inner.release,
outs = trackerList.map(_.io.inner.release),
allocs = trackerList.map(_.io.alloc.irel))
// Wire probe requests and grant reply to clients, finish acks from clients
doOutputArbitration(io.inner.probe, trackerList.map(_.io.inner.probe))
doOutputArbitration(io.inner.grant, trackerList.map(_.io.inner.grant))
doInputRouting(io.inner.finish, trackerList.map(_.io.inner.finish))
disconnectOuterProbeAndFinish()
}
class BroadcastXactTracker(implicit p: Parameters) extends XactTracker()(p) {
val io = new HierarchicalXactTrackerIO
pinAllReadyValidLow(io)
}
trait BroadcastsToAllClients extends HasCoherenceAgentParameters {
val coh = HierarchicalMetadata.onReset
val inner_coh = coh.inner
val outer_coh = coh.outer
def full_representation = ~UInt(0, width = innerNCachingClients)
}
abstract class BroadcastVoluntaryReleaseTracker(trackerId: Int)(implicit p: Parameters)
extends VoluntaryReleaseTracker(trackerId)(p)
with EmitsVoluntaryReleases
with BroadcastsToAllClients {
val io = new HierarchicalXactTrackerIO
pinAllReadyValidLow(io)
// Checks for illegal behavior
assert(!(state === s_idle && io.inner.release.fire() && io.alloc.irel.should && !io.irel().isVoluntary()),
"VoluntaryReleaseTracker accepted Release that wasn't voluntary!")
}
abstract class BroadcastAcquireTracker(trackerId: Int)(implicit p: Parameters)
extends AcquireTracker(trackerId)(p)
with EmitsVoluntaryReleases
with BroadcastsToAllClients {
val io = new HierarchicalXactTrackerIO
pinAllReadyValidLow(io)
val alwaysWriteFullBeat = false
val nSecondaryMisses = 1
def iacq_can_merge = Bool(false)
// Checks for illegal behavior
// TODO: this could be allowed, but is a useful check against allocation gone wild
assert(!(state === s_idle && io.inner.acquire.fire() && io.alloc.iacq.should &&
io.iacq().hasMultibeatData() && !io.iacq().first()),
"AcquireTracker initialized with a tail data beat.")
assert(!(state =/= s_idle && pending_ignt && xact_iacq.isPrefetch()),
"Broadcast Hub does not support Prefetches.")
assert(!(state =/= s_idle && pending_ignt && xact_iacq.isAtomic()),
"Broadcast Hub does not support PutAtomics.")
}
class BufferedBroadcastVoluntaryReleaseTracker(trackerId: Int)(implicit p: Parameters)
extends BroadcastVoluntaryReleaseTracker(trackerId)(p)
with HasDataBuffer {
// Tell the parent if any incoming messages conflict with the ongoing transaction
routeInParent(irelCanAlloc = Bool(true))
// Start transaction by accepting inner release
innerRelease(block_vol_ignt = pending_orel || vol_ognt_counter.pending)
// A release beat can be accepted if we are idle, if its a mergeable transaction, or if its a tail beat
io.inner.release.ready := state === s_idle || irel_can_merge || irel_same_xact
when(io.inner.release.fire()) { data_buffer(io.irel().addr_beat) := io.irel().data }
// Dispatch outer release
outerRelease(
coh = outer_coh.onHit(M_XWR),
data = data_buffer(vol_ognt_counter.up.idx),
add_pending_send_bit = irel_is_allocating)
quiesce() {}
}
class BufferedBroadcastAcquireTracker(trackerId: Int)(implicit p: Parameters)
extends BroadcastAcquireTracker(trackerId)(p)
with HasByteWriteMaskBuffer {
// Setup IOs used for routing in the parent
routeInParent(iacqCanAlloc = Bool(true))
// First, take care of accpeting new acquires or secondary misses
// Handling of primary and secondary misses' data and write mask merging
innerAcquire(
can_alloc = Bool(false),
next = s_inner_probe)
io.inner.acquire.ready := state === s_idle || iacq_can_merge || iacq_same_xact_multibeat
// Track which clients yet need to be probed and make Probe message
// If a writeback occurs, we can forward its data via the buffer,
// and skip having to go outwards
val skip_outer_acquire = pending_ignt_data.andR
innerProbe(
inner_coh.makeProbe(curr_probe_dst, xact_iacq, xact_addr_block),
Mux(!skip_outer_acquire, s_outer_acquire, s_busy))
// Handle incoming releases from clients, which may reduce sharer counts
// and/or write back dirty data, and may be unexpected voluntary releases
def irel_can_merge = io.irel().conflicts(xact_addr_block) &&
io.irel().isVoluntary() &&
!state.isOneOf(s_idle, s_meta_write) &&
!all_pending_done &&
!io.outer.grant.fire() &&
!io.inner.grant.fire() &&
!vol_ignt_counter.pending &&
!blockInnerRelease()
innerRelease(block_vol_ignt = vol_ognt_counter.pending)
//TODO: accept vol irels when state === s_idle, operate like the VolRelTracker
io.inner.release.ready := irel_can_merge || irel_same_xact
mergeDataInner(io.inner.release)
// If there was a writeback, forward it outwards
outerRelease(
coh = outer_coh.onHit(M_XWR),
data = data_buffer(vol_ognt_counter.up.idx))
// Send outer request for miss
outerAcquire(
caching = !xact_iacq.isBuiltInType(),
coh = outer_coh,
data = data_buffer(ognt_counter.up.idx),
wmask = wmask_buffer(ognt_counter.up.idx),
next = s_busy)
// Handle the response from outer memory
mergeDataOuter(io.outer.grant)
// Acknowledge or respond with data
innerGrant(
data = data_buffer(ignt_data_idx),
external_pending = pending_orel || ognt_counter.pending || vol_ognt_counter.pending)
when(iacq_is_allocating) {
initializeProbes()
}
initDataInner(io.inner.acquire, iacq_is_allocating || iacq_is_merging)
// Wait for everything to quiesce
quiesce() { clearWmaskBuffer() }
}

View File

@ -0,0 +1,162 @@
// See LICENSE for license details.
package uncore.agents
import Chisel._
import uncore.coherence._
import uncore.tilelink._
import uncore.constants._
import cde.Parameters
class BufferlessBroadcastHub(implicit p: Parameters) extends HierarchicalCoherenceAgent()(p) {
// Create TSHRs for outstanding transactions
val irelTrackerList =
(0 until nReleaseTransactors).map(id =>
Module(new BufferlessBroadcastVoluntaryReleaseTracker(id)))
val iacqTrackerList =
(nReleaseTransactors until nTransactors).map(id =>
Module(new BufferlessBroadcastAcquireTracker(id)))
val trackerList = irelTrackerList ++ iacqTrackerList
// Propagate incoherence flags
trackerList.map(_.io.incoherent) foreach { _ := io.incoherent }
// Create an arbiter for the one memory port
val outerList = trackerList.map(_.io.outer)
val outer_arb = Module(new ClientTileLinkIOArbiter(outerList.size)
(p.alterPartial({ case TLId => p(OuterTLId) })))
outer_arb.io.in <> outerList
io.outer <> outer_arb.io.out
val iacq = Queue(io.inner.acquire, 1, pipe=true)
val irel = Queue(io.inner.release, 1, pipe=true)
// Handle acquire transaction initiation
val irel_vs_iacq_conflict =
iacq.valid &&
irel.valid &&
irel.bits.conflicts(iacq.bits)
doInputRoutingWithAllocation(
in = iacq,
outs = trackerList.map(_.io.inner.acquire),
allocs = trackerList.map(_.io.alloc.iacq),
allocOverride = Some(!irel_vs_iacq_conflict))
io.outer.acquire.bits.data := iacq.bits.data
when (io.oacq().hasData()) {
io.outer.acquire.bits.addr_beat := iacq.bits.addr_beat
}
// Handle releases, which might be voluntary and might have data
doInputRoutingWithAllocation(
in = irel,
outs = trackerList.map(_.io.inner.release),
allocs = trackerList.map(_.io.alloc.irel))
io.outer.release.bits.data := irel.bits.data
when (io.orel().hasData()) {
io.outer.release.bits.addr_beat := irel.bits.addr_beat
}
// Wire probe requests and grant reply to clients, finish acks from clients
doOutputArbitration(io.inner.probe, trackerList.map(_.io.inner.probe))
doOutputArbitration(io.inner.grant, trackerList.map(_.io.inner.grant))
io.inner.grant.bits.data := io.outer.grant.bits.data
io.inner.grant.bits.addr_beat := io.outer.grant.bits.addr_beat
doInputRouting(io.inner.finish, trackerList.map(_.io.inner.finish))
disconnectOuterProbeAndFinish()
}
class BufferlessBroadcastVoluntaryReleaseTracker(trackerId: Int)(implicit p: Parameters)
extends BroadcastVoluntaryReleaseTracker(trackerId)(p) {
// Tell the parent if any incoming messages conflict with the ongoing transaction
routeInParent(irelCanAlloc = Bool(true))
// Start transaction by accepting inner release
innerRelease(block_vol_ignt = pending_orel || vol_ognt_counter.pending)
// A release beat can be accepted if we are idle, if its a mergeable transaction, or if its a tail beat
// and if the outer relase path is clear
io.inner.release.ready := Mux(io.irel().hasData(),
(state =/= s_idle) && (irel_can_merge || irel_same_xact) && io.outer.release.ready,
(state === s_idle) || irel_can_merge || irel_same_xact)
// Dispatch outer release
outerRelease(coh = outer_coh.onHit(M_XWR), buffering = Bool(false))
quiesce() {}
}
class BufferlessBroadcastAcquireTracker(trackerId: Int)(implicit p: Parameters)
extends BroadcastAcquireTracker(trackerId)(p) {
// Setup IOs used for routing in the parent
routeInParent(iacqCanAlloc = Bool(true))
// First, take care of accpeting new acquires or secondary misses
// Handling of primary and secondary misses' data and write mask merging
innerAcquire(
can_alloc = Bool(false),
next = s_inner_probe)
// We are never going to merge anything in the bufferless hub
// Therefore, we only need to concern ourselves with the allocated
// transaction and (in case of PutBlock) subsequent tail beats
val iacq_can_forward = iacq_same_xact && !vol_ognt_counter.pending
io.inner.acquire.ready := Mux(io.iacq().hasData(),
state === s_outer_acquire && iacq_can_forward && io.outer.acquire.ready,
state === s_idle && io.alloc.iacq.should)
// Track which clients yet need to be probed and make Probe message
innerProbe(
inner_coh.makeProbe(curr_probe_dst, xact_iacq, xact_addr_block),
s_outer_acquire)
// Handle incoming releases from clients, which may reduce sharer counts
// and/or write back dirty data, and may be unexpected voluntary releases
def irel_can_merge = io.irel().conflicts(xact_addr_block) &&
io.irel().isVoluntary() &&
!vol_ignt_counter.pending &&
!(io.irel().hasData() && ognt_counter.pending) &&
(state =/= s_idle)
innerRelease(block_vol_ignt = vol_ognt_counter.pending)
val irel_could_accept = irel_can_merge || irel_same_xact
io.inner.release.ready := irel_could_accept &&
(!io.irel().hasData() || io.outer.release.ready)
// If there was a writeback, forward it outwards
outerRelease(
coh = outer_coh.onHit(M_XWR),
buffering = Bool(false),
block_orel = !irel_could_accept)
// Send outer request for miss
outerAcquire(
caching = !xact_iacq.isBuiltInType(),
block_outer_acquire = vol_ognt_counter.pending,
buffering = Bool(false),
coh = outer_coh,
next = s_busy)
// Handle the response from outer memory
when (ognt_counter.pending && io.ognt().hasData()) {
io.outer.grant.ready := io.inner.grant.ready // bypass data
}
// Acknowledge or respond with data
innerGrant(
external_pending = pending_orel || vol_ognt_counter.pending,
buffering = Bool(false))
when(iacq_is_allocating) { initializeProbes() }
// Wait for everything to quiesce
quiesce() {}
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,146 @@
// See LICENSE for license details.
package uncore.agents
import Chisel._
import uncore.util._
abstract class Decoding
{
def uncorrected: UInt
def corrected: UInt
def correctable: Bool
def uncorrectable: Bool
def error = correctable || uncorrectable
}
abstract class Code
{
def width(w0: Int): Int
def encode(x: UInt): UInt
def decode(x: UInt): Decoding
}
class IdentityCode extends Code
{
def width(w0: Int) = w0
def encode(x: UInt) = x
def decode(y: UInt) = new Decoding {
def uncorrected = y
def corrected = y
def correctable = Bool(false)
def uncorrectable = Bool(false)
}
}
class ParityCode extends Code
{
def width(w0: Int) = w0+1
def encode(x: UInt) = Cat(x.xorR, x)
def decode(y: UInt) = new Decoding {
def uncorrected = y(y.getWidth-2,0)
def corrected = uncorrected
def correctable = Bool(false)
def uncorrectable = y.xorR
}
}
class SECCode extends Code
{
def width(k: Int) = {
val m = log2Floor(k) + 1
k + m + (if((1 << m) < m+k+1) 1 else 0)
}
def encode(x: UInt) = {
val k = x.getWidth
require(k > 0)
val n = width(k)
val y = for (i <- 1 to n) yield {
if (isPow2(i)) {
val r = for (j <- 1 to n; if j != i && (j & i) != 0)
yield x(mapping(j))
r reduce (_^_)
} else
x(mapping(i))
}
y.asUInt
}
def decode(y: UInt) = new Decoding {
val n = y.getWidth
require(n > 0 && !isPow2(n))
val p2 = for (i <- 0 until log2Up(n)) yield 1 << i
val syndrome = (p2 map { i =>
val r = for (j <- 1 to n; if (j & i) != 0)
yield y(j-1)
r reduce (_^_)
}).asUInt
private def swizzle(z: UInt) = (1 to n).filter(i => !isPow2(i)).map(i => z(i-1)).asUInt
def uncorrected = swizzle(y)
def corrected = swizzle(((y << 1) ^ UIntToOH(syndrome)) >> 1)
def correctable = syndrome.orR
def uncorrectable = Bool(false)
}
private def mapping(i: Int) = i-1-log2Up(i)
}
class SECDEDCode extends Code
{
private val sec = new SECCode
private val par = new ParityCode
def width(k: Int) = sec.width(k)+1
def encode(x: UInt) = par.encode(sec.encode(x))
def decode(x: UInt) = new Decoding {
val secdec = sec.decode(x(x.getWidth-2,0))
val pardec = par.decode(x)
def uncorrected = secdec.uncorrected
def corrected = secdec.corrected
def correctable = pardec.uncorrectable
def uncorrectable = !pardec.uncorrectable && secdec.correctable
}
}
object ErrGen
{
// generate a 1-bit error with approximate probability 2^-f
def apply(width: Int, f: Int): UInt = {
require(width > 0 && f >= 0 && log2Up(width) + f <= 16)
UIntToOH(LFSR16()(log2Up(width)+f-1,0))(width-1,0)
}
def apply(x: UInt, f: Int): UInt = x ^ apply(x.getWidth, f)
}
class SECDEDTest extends Module
{
val code = new SECDEDCode
val k = 4
val n = code.width(k)
val io = new Bundle {
val original = Bits(OUTPUT, k)
val encoded = Bits(OUTPUT, n)
val injected = Bits(OUTPUT, n)
val uncorrected = Bits(OUTPUT, k)
val corrected = Bits(OUTPUT, k)
val correctable = Bool(OUTPUT)
val uncorrectable = Bool(OUTPUT)
}
val c = Counter(Bool(true), 1 << k)
val numErrors = Counter(c._2, 3)._1
val e = code.encode(c._1)
val i = e ^ Mux(numErrors < UInt(1), UInt(0), ErrGen(n, 1)) ^ Mux(numErrors < UInt(2), UInt(0), ErrGen(n, 1))
val d = code.decode(i)
io.original := c._1
io.encoded := e
io.injected := i
io.uncorrected := d.uncorrected
io.corrected := d.corrected
io.correctable := d.correctable
io.uncorrectable := d.uncorrectable
}

View File

@ -0,0 +1,73 @@
package uncore.agents
import Chisel._
import uncore.tilelink._
import cde.Parameters
class MMIOTileLinkManagerData(implicit p: Parameters)
extends TLBundle()(p)
with HasClientId
with HasClientTransactionId
class MMIOTileLinkManager(implicit p: Parameters)
extends CoherenceAgentModule()(p) {
val io = new ManagerTLIO
// MMIO requests should never need probe or release
io.inner.probe.valid := Bool(false)
io.inner.release.ready := Bool(false)
val multibeat_fire = io.outer.acquire.fire() && io.oacq().hasMultibeatData()
val multibeat_start = multibeat_fire && io.oacq().addr_beat === UInt(0)
val multibeat_end = multibeat_fire && io.oacq().addr_beat === UInt(outerDataBeats - 1)
// Acquire and Grant are basically passthru,
// except client_id and client_xact_id need to be converted.
// Associate the inner client_id and client_xact_id
// with the outer client_xact_id.
val xact_pending = Reg(init = UInt(0, maxManagerXacts))
val xact_id_sel = PriorityEncoder(~xact_pending)
val xact_id_reg = RegEnable(xact_id_sel, multibeat_start)
val xact_multibeat = Reg(init = Bool(false))
val outer_xact_id = Mux(xact_multibeat, xact_id_reg, xact_id_sel)
val xact_free = !xact_pending.andR
val xact_buffer = Reg(Vec(maxManagerXacts, new MMIOTileLinkManagerData))
io.inner.acquire.ready := io.outer.acquire.ready && xact_free
io.outer.acquire.valid := io.inner.acquire.valid && xact_free
io.outer.acquire.bits := io.inner.acquire.bits
io.outer.acquire.bits.client_xact_id := outer_xact_id
def isLastBeat[T <: TileLinkChannel with HasTileLinkBeatId](in: T): Bool =
!in.hasMultibeatData() || in.addr_beat === UInt(outerDataBeats - 1)
def addPendingBitOnAcq[T <: AcquireMetadata](in: DecoupledIO[T]): UInt =
Mux(in.fire() && isLastBeat(in.bits), UIntToOH(in.bits.client_xact_id), UInt(0))
def clearPendingBitOnGnt[T <: GrantMetadata](in: DecoupledIO[T]): UInt =
~Mux(in.fire() && isLastBeat(in.bits) && !in.bits.requiresAck(),
UIntToOH(in.bits.manager_xact_id), UInt(0))
def clearPendingBitOnFin(in: DecoupledIO[Finish]): UInt =
~Mux(in.fire(), UIntToOH(in.bits.manager_xact_id), UInt(0))
xact_pending := (xact_pending | addPendingBitOnAcq(io.outer.acquire)) &
clearPendingBitOnFin(io.inner.finish) &
clearPendingBitOnGnt(io.inner.grant)
when (io.outer.acquire.fire() && isLastBeat(io.outer.acquire.bits)) {
xact_buffer(outer_xact_id) := io.iacq()
}
when (multibeat_start) { xact_multibeat := Bool(true) }
when (multibeat_end) { xact_multibeat := Bool(false) }
val gnt_xact = xact_buffer(io.ognt().client_xact_id)
io.outer.grant.ready := io.inner.grant.ready
io.inner.grant.valid := io.outer.grant.valid
io.inner.grant.bits := io.outer.grant.bits
io.inner.grant.bits.client_id := gnt_xact.client_id
io.inner.grant.bits.client_xact_id := gnt_xact.client_xact_id
io.inner.grant.bits.manager_xact_id := io.ognt().client_xact_id
io.inner.finish.ready := Bool(true)
}

View File

@ -0,0 +1,69 @@
// See LICENSE for license details.
package uncore.agents
import Chisel._
import uncore.coherence._
import uncore.tilelink._
import uncore.constants._
import uncore.devices._
import cde.{Parameters, Field, Config}
/** The ManagerToClientStateless Bridge does not maintain any state for the messages
* which pass through it. It simply passes the messages back and forth without any
* tracking or translation.
*
* This can reduce area and timing in very constrained situations:
* - The Manager and Client implement the same coherence protocol
* - There are no probe or finish messages.
* - The outer transaction ID is large enough to handle all possible inner
* transaction IDs, such that no remapping state must be maintained.
*
* This bridge DOES NOT keep the uncached channel coherent with the cached
* channel. Uncached requests to blocks cached by the L1 will not probe the L1.
* As a result, uncached reads to cached blocks will get stale data until
* the L1 performs a voluntary writeback, and uncached writes to cached blocks
* will get lost, as the voluntary writeback from the L1 will overwrite the
* changes. If your tile relies on probing the L1 data cache in order to
* share data between the instruction cache and data cache (e.g. you are using
* a non-blocking L1 D$) or if the tile has uncached channels capable of
* writes (e.g. Hwacha and other RoCC accelerators), DO NOT USE THIS BRIDGE.
*/
class ManagerToClientStatelessBridge(implicit p: Parameters) extends HierarchicalCoherenceAgent()(p) {
val icid = io.inner.tlClientIdBits
val ixid = io.inner.tlClientXactIdBits
val oxid = io.outer.tlClientXactIdBits
val innerCoh = io.inner.tlCoh.getClass
val outerCoh = io.outer.tlCoh.getClass
// Stateless Bridge is only usable in certain constrained situations.
// Sanity check its usage here.
require(io.inner.tlNCachingClients <= 1)
require(icid + ixid <= oxid)
require(innerCoh eq outerCoh,
s"Coherence policies do not match: inner is ${innerCoh.getSimpleName}, outer is ${outerCoh.getSimpleName}")
io.outer.acquire.valid := io.inner.acquire.valid
io.inner.acquire.ready := io.outer.acquire.ready
io.outer.acquire.bits := io.inner.acquire.bits
io.outer.acquire.bits.client_xact_id := Cat(io.inner.acquire.bits.client_id, io.inner.acquire.bits.client_xact_id)
io.outer.release.valid := io.inner.release.valid
io.inner.release.ready := io.outer.release.ready
io.outer.release.bits := io.inner.release.bits
io.outer.release.bits.client_xact_id := Cat(io.inner.release.bits.client_id, io.inner.release.bits.client_xact_id)
io.inner.grant.valid := io.outer.grant.valid
io.outer.grant.ready := io.inner.grant.ready
io.inner.grant.bits := io.outer.grant.bits
io.inner.grant.bits.client_xact_id := io.outer.grant.bits.client_xact_id(ixid-1, 0)
io.inner.grant.bits.client_id := io.outer.grant.bits.client_xact_id(icid+ixid-1, ixid)
io.inner.probe.valid := Bool(false)
io.inner.finish.ready := Bool(true)
disconnectOuterProbeAndFinish()
}

View File

@ -0,0 +1,119 @@
// See LICENSE for license details.
package uncore.agents
import Chisel._
import uncore.tilelink._
import cde.{Parameters, Field}
case object L2StoreDataQueueDepth extends Field[Int]
trait HasStoreDataQueueParameters extends HasCoherenceAgentParameters {
val sdqDepth = p(L2StoreDataQueueDepth)*innerDataBeats
val dqIdxBits = math.max(log2Up(nReleaseTransactors) + 1, log2Up(sdqDepth))
val nDataQueueLocations = 3 //Stores, VoluntaryWBs, Releases
}
class DataQueueLocation(implicit p: Parameters) extends CoherenceAgentBundle()(p)
with HasStoreDataQueueParameters {
val idx = UInt(width = dqIdxBits)
val loc = UInt(width = log2Ceil(nDataQueueLocations))
}
object DataQueueLocation {
def apply(idx: UInt, loc: UInt)(implicit p: Parameters) = {
val d = Wire(new DataQueueLocation)
d.idx := idx
d.loc := loc
d
}
}
trait HasStoreDataQueue extends HasStoreDataQueueParameters {
val io: HierarchicalTLIO
val trackerIOsList: Seq[HierarchicalXactTrackerIO]
val internalDataBits = new DataQueueLocation().getWidth
val inStoreQueue :: inVolWBQueue :: inClientReleaseQueue :: Nil = Enum(UInt(), nDataQueueLocations)
val usingStoreDataQueue = p.alterPartial({
case TLKey(`innerTLId`) => innerTLParams.copy(overrideDataBitsPerBeat = Some(internalDataBits))
case TLKey(`outerTLId`) => outerTLParams.copy(overrideDataBitsPerBeat = Some(internalDataBits))
})
// Queue to store impending Put data
lazy val sdq = Reg(Vec(sdqDepth, io.iacq().data))
lazy val sdq_val = Reg(init=Bits(0, sdqDepth))
lazy val sdq_alloc_id = PriorityEncoder(~sdq_val)
lazy val sdq_rdy = !sdq_val.andR
lazy val sdq_enq = trackerIOsList.map( t =>
(t.alloc.iacq.should || t.alloc.iacq.matches) &&
t.inner.acquire.fire() &&
t.iacq().hasData()
).reduce(_||_)
lazy val sdqLoc = List.fill(nTransactors) {
DataQueueLocation(sdq_alloc_id, inStoreQueue).asUInt
}
/*
doInputRoutingWithAllocation(
in = io.inner.acquire,
outs = trackerList.map(_.io.inner.acquire),
allocs = trackerList.map(_.io.alloc._iacq),
dataOverride = Some(sdqLoc),
allocOverride = Some(sdq_rdy && !irel_vs_iacq_conflict))
*/
// Queue to store impending Voluntary Release data
lazy val voluntary = io.irel().isVoluntary()
lazy val vwbdq_enq = io.inner.release.fire() && voluntary && io.irel().hasData()
lazy val (rel_data_cnt, rel_data_done) = Counter(vwbdq_enq, innerDataBeats) //TODO Zero width
lazy val vwbdq = Reg(Vec(innerDataBeats, io.irel().data)) //TODO Assumes nReleaseTransactors == 1
lazy val vwbqLoc = (0 until nTransactors).map(i =>
(DataQueueLocation(rel_data_cnt,
(if(i < nReleaseTransactors) inVolWBQueue
else inClientReleaseQueue)).asUInt))
/*
doInputRoutingWithAllocation(
io.inner.release,
trackerList.map(_.io.inner.release),
trackerList.map(_.io.matches.irel),
trackerList.map(_.io.alloc.irel),
Some(vwbqLoc))
*/
val outer_arb: ClientTileLinkIOArbiter
lazy val outer_data_ptr = new DataQueueLocation().fromBits(outer_arb.io.out.acquire.bits.data)
/*
val outer_arb = Module(new ClientTileLinkIOArbiter(trackerList.size)
(usingStoreDataQueue.alterPartial({ case TLId => p(OuterTLId) })))
outer_arb.io.in <> trackerList
*/
// Get the pending data out of the store data queue
lazy val is_in_sdq = outer_data_ptr.loc === inStoreQueue
lazy val free_sdq = io.outer.acquire.fire() &&
io.outer.acquire.bits.hasData() &&
outer_data_ptr.loc === inStoreQueue
/*
io.outer <> outer_arb.io.out
io.outer.acquire.bits.data := MuxLookup(outer_data_ptr.loc, io.irel().data, Array(
inStoreQueue -> sdq(outer_data_ptr.idx),
inVolWBQueue -> vwbdq(outer_data_ptr.idx)))
*/
// Enqueue SDQ data
def sdqEnqueue() {
when (sdq_enq) { sdq(sdq_alloc_id) := io.iacq().data }
when(vwbdq_enq) { vwbdq(rel_data_cnt) := io.irel().data }
}
// Update SDQ valid bits
def sdqUpdate() {
when (io.outer.acquire.valid || sdq_enq) {
sdq_val := sdq_val & ~(UIntToOH(outer_data_ptr.idx) & Fill(sdqDepth, free_sdq)) |
PriorityEncoderOH(~sdq_val(sdqDepth-1,0)) & Fill(sdqDepth, sdq_enq)
}
}
}

View File

@ -0,0 +1,654 @@
// See LICENSE for license details.
package uncore.agents
import Chisel._
import uncore.coherence._
import uncore.tilelink._
import uncore.util._
import uncore.util._
import junctions._
import cde.{Field, Parameters}
import scala.math.max
case object EnableL2Logging extends Field[Boolean]
class TrackerAllocation extends Bundle {
val matches = Bool(OUTPUT)
val can = Bool(OUTPUT)
val should = Bool(INPUT)
}
class TrackerAllocationIO(implicit val p: Parameters)
extends ParameterizedBundle()(p)
with HasCacheBlockAddress {
val iacq = new TrackerAllocation
val irel = new TrackerAllocation
val oprb = new TrackerAllocation
val idle = Bool(OUTPUT)
override val addr_block = UInt(OUTPUT, tlBlockAddrBits)
}
trait HasTrackerAllocationIO extends Bundle {
implicit val p: Parameters
val alloc = new TrackerAllocationIO
}
class ManagerXactTrackerIO(implicit p: Parameters) extends ManagerTLIO()(p)
with HasTrackerAllocationIO
class HierarchicalXactTrackerIO(implicit p: Parameters) extends HierarchicalTLIO()(p)
with HasTrackerAllocationIO
abstract class XactTracker(implicit p: Parameters) extends CoherenceAgentModule()(p)
with HasXactTrackerStates
with HasPendingBitHelpers {
override val s_idle :: s_meta_read :: s_meta_resp :: s_wb_req :: s_wb_resp :: s_inner_probe :: s_outer_acquire :: s_busy :: s_meta_write :: Nil = Enum(UInt(), 9)
val state = Reg(init=s_idle)
def quiesce(next: UInt = s_idle)(restore: => Unit) {
all_pending_done := !scoreboard.foldLeft(Bool(false))(_||_)
when(state === s_busy && all_pending_done) {
state := next
restore
}
}
def pinAllReadyValidLow[T <: Data](b: Bundle) {
b.elements.foreach {
_._2 match {
case d: DecoupledIO[_] =>
if(d.ready.dir == OUTPUT) d.ready := Bool(false)
else if(d.valid.dir == OUTPUT) d.valid := Bool(false)
case v: ValidIO[_] => if(v.valid.dir == OUTPUT) v.valid := Bool(false)
case b: Bundle => pinAllReadyValidLow(b)
case _ =>
}
}
}
}
trait HasXactTrackerStates {
def state: UInt
def s_idle: UInt = UInt(0)
def s_meta_read: UInt
def s_meta_resp: UInt
def s_wb_req: UInt
def s_wb_resp: UInt
def s_inner_probe: UInt
def s_outer_acquire: UInt
def s_busy: UInt
def s_meta_write: UInt
}
trait HasPendingBitHelpers extends HasDataBeatCounters {
val scoreboard = scala.collection.mutable.ListBuffer.empty[Bool]
val all_pending_done = Wire(Bool())
def addPendingBitWhenBeat[T <: HasBeat](inc: Bool, in: T): UInt =
Fill(in.tlDataBeats, inc) & UIntToOH(in.addr_beat)
def dropPendingBitWhenBeat[T <: HasBeat](dec: Bool, in: T): UInt =
~Fill(in.tlDataBeats, dec) | ~UIntToOH(in.addr_beat)
def addPendingBitWhenId[T <: HasClientId](inc: Bool, in: T): UInt =
Fill(in.tlNCachingClients, inc) & UIntToOH(in.client_id)
def dropPendingBitWhenId[T <: HasClientId](dec: Bool, in: T): UInt =
~Fill(in.tlNCachingClients, dec) | ~UIntToOH(in.client_id)
def addPendingBitWhenBeatHasData[T <: HasBeat](in: DecoupledIO[T], inc: Bool = Bool(true)): UInt =
addPendingBitWhenBeat(in.fire() && in.bits.hasData() && inc, in.bits)
def addPendingBitWhenBeatHasDataAndAllocs(
in: DecoupledIO[AcquireFromSrc],
alloc_override: Bool = Bool(false)): UInt =
addPendingBitWhenBeatHasData(in, in.bits.allocate() || alloc_override)
def addPendingBitWhenBeatNeedsRead(in: DecoupledIO[AcquireFromSrc],
always: Bool = Bool(true), unless: Bool = Bool(false)): UInt = {
val a = in.bits
val needs_read = !unless && (a.isGet() || a.isAtomic() || a.hasPartialWritemask()) || always
addPendingBitWhenBeat(in.fire() && needs_read, a)
}
def addPendingBitWhenBeatHasPartialWritemask(in: DecoupledIO[AcquireFromSrc]): UInt =
addPendingBitWhenBeat(in.fire() && in.bits.hasPartialWritemask(), in.bits)
def addPendingBitsFromAcquire(a: SecondaryMissInfo): UInt =
Mux(a.hasMultibeatData(), Fill(a.tlDataBeats, UInt(1, 1)), UIntToOH(a.addr_beat))
def dropPendingBitWhenBeatHasData[T <: HasBeat](in: DecoupledIO[T]): UInt =
dropPendingBitWhenBeat(in.fire() && in.bits.hasData(), in.bits)
def dropPendingBitAtDest[T <: HasId](in: DecoupledIO[T]): UInt =
dropPendingBitWhenId(in.fire(), in.bits)
def dropPendingBitAtDestWhenVoluntary[T <: HasId with MightBeVoluntary](in: DecoupledIO[T]): UInt =
dropPendingBitWhenId(in.fire() && in.bits.isVoluntary(), in.bits)
def addPendingBitAtSrc[T <: HasId](in: DecoupledIO[T]): UInt =
addPendingBitWhenId(in.fire(), in.bits)
def addPendingBitAtSrcWhenVoluntary[T <: HasId with MightBeVoluntary](in: DecoupledIO[T]): UInt =
addPendingBitWhenId(in.fire() && in.bits.isVoluntary(), in.bits)
def addOtherBits(en: Bool, nBits: Int): UInt =
Mux(en, Cat(Fill(nBits - 1, UInt(1, 1)), UInt(0, 1)), UInt(0, nBits))
def addPendingBitsOnFirstBeat(in: DecoupledIO[Acquire]): UInt =
addOtherBits(in.fire() &&
in.bits.hasMultibeatData() &&
in.bits.addr_beat === UInt(0),
in.bits.tlDataBeats)
def dropPendingBitsOnFirstBeat(in: DecoupledIO[Acquire]): UInt =
~addPendingBitsOnFirstBeat(in)
}
trait HasDataBuffer extends HasCoherenceAgentParameters {
val data_buffer = Reg(init=Vec.fill(innerDataBeats)(UInt(0, width = innerDataBits)))
type TLDataBundle = TLBundle with HasTileLinkData with HasTileLinkBeatId
def initDataInner[T <: Acquire](in: DecoupledIO[T], alloc: Bool) {
when(in.fire() && in.bits.hasData() && alloc) {
data_buffer(in.bits.addr_beat) := in.bits.data
}
}
// TODO: provide func for accessing when innerDataBeats =/= outerDataBeats or internalDataBeats
def mergeData(dataBits: Int)(beat: UInt, incoming: UInt) {
data_buffer(beat) := incoming
}
def mergeDataInner[T <: TLDataBundle](in: DecoupledIO[T]) {
when(in.fire() && in.bits.hasData()) {
mergeData(innerDataBits)(in.bits.addr_beat, in.bits.data)
}
}
def mergeDataOuter[T <: TLDataBundle](in: DecoupledIO[T]) {
when(in.fire() && in.bits.hasData()) {
mergeData(outerDataBits)(in.bits.addr_beat, in.bits.data)
}
}
}
trait HasByteWriteMaskBuffer extends HasDataBuffer {
val wmask_buffer = Reg(init=Vec.fill(innerDataBeats)(UInt(0, width = innerWriteMaskBits)))
val data_valid = Vec(wmask_buffer.map(wmask => wmask.andR))
override def initDataInner[T <: Acquire](in: DecoupledIO[T], alloc: Bool) {
when(in.fire() && in.bits.hasData() && alloc) {
val beat = in.bits.addr_beat
val full = FillInterleaved(8, in.bits.wmask())
data_buffer(beat) := (~full & data_buffer(beat)) | (full & in.bits.data)
wmask_buffer(beat) := in.bits.wmask() | wmask_buffer(beat) // assumes wmask_buffer is zeroed
}
}
override def mergeData(dataBits: Int)(beat: UInt, incoming: UInt) {
val old_data = incoming // Refilled, written back, or de-cached data
val new_data = data_buffer(beat) // Newly Put data is already in the buffer
val wmask = FillInterleaved(8, wmask_buffer(beat))
data_buffer(beat) := (~wmask & old_data) | (wmask & new_data)
wmask_buffer(beat) := ~UInt(0, innerWriteMaskBits)
}
def clearWmaskBuffer() {
wmask_buffer.foreach { w => w := UInt(0) }
}
}
trait HasBlockAddressBuffer extends HasCoherenceAgentParameters {
val xact_addr_block = Reg(init = UInt(0, width = blockAddrBits))
}
trait HasAcquireMetadataBuffer extends HasBlockAddressBuffer {
val xact_allocate = Reg{ Bool() }
val xact_amo_shift_bytes = Reg{ UInt() }
val xact_op_code = Reg{ UInt() }
val xact_addr_byte = Reg{ UInt() }
val xact_op_size = Reg{ UInt() }
val xact_addr_beat = Wire(UInt())
val xact_iacq = Wire(new SecondaryMissInfo)
}
trait HasVoluntaryReleaseMetadataBuffer extends HasBlockAddressBuffer
with HasPendingBitHelpers
with HasXactTrackerStates {
def io: HierarchicalXactTrackerIO
val xact_vol_ir_r_type = Reg{ UInt() }
val xact_vol_ir_src = Reg{ UInt() }
val xact_vol_ir_client_xact_id = Reg{ UInt() }
def xact_vol_irel = Release(
src = xact_vol_ir_src,
voluntary = Bool(true),
r_type = xact_vol_ir_r_type,
client_xact_id = xact_vol_ir_client_xact_id,
addr_block = xact_addr_block)
(p.alterPartial({ case TLId => p(InnerTLId) }))
}
trait AcceptsVoluntaryReleases extends HasVoluntaryReleaseMetadataBuffer {
def inner_coh: ManagerMetadata
val pending_irel_data = Reg(init=Bits(0, width = innerDataBeats))
val vol_ignt_counter = Wire(new TwoWayBeatCounterStatus)
def irel_can_merge: Bool
def irel_same_xact: Bool
def irel_is_allocating: Bool = state === s_idle && io.alloc.irel.should && io.inner.release.valid
def irel_is_merging: Bool = (irel_can_merge || irel_same_xact) && io.inner.release.valid
def innerRelease(block_vol_ignt: Bool = Bool(false), next: UInt = s_busy) {
connectTwoWayBeatCounters(
status = vol_ignt_counter,
up = io.inner.release,
down = io.inner.grant,
trackUp = (r: Release) => {
Mux(state === s_idle, io.alloc.irel.should, io.alloc.irel.matches) && r.isVoluntary() && r.requiresAck()
},
trackDown = (g: Grant) => (state =/= s_idle) && g.isVoluntary())
when(irel_is_allocating) {
xact_addr_block := io.irel().addr_block
// Set all of them to pending in the beginning as a precaution
// If it turns out we don't need some or all of the beats, they will
// be overridden below
pending_irel_data := ~UInt(0, innerDataBeats)
state := next
}
val irel_fire = (irel_is_allocating || irel_is_merging) && io.inner.release.ready
when (irel_fire) {
when (io.irel().first()) {
when (io.irel().isVoluntary()) {
xact_vol_ir_r_type := io.irel().r_type
xact_vol_ir_src := io.irel().client_id
xact_vol_ir_client_xact_id := io.irel().client_xact_id
}
// If this release has data, set all the pending bits except the first.
// Otherwise, clear all the pending bits
pending_irel_data := Mux(io.irel().hasMultibeatData(),
dropPendingBitWhenBeatHasData(io.inner.release),
UInt(0))
} .otherwise {
pending_irel_data := (pending_irel_data & dropPendingBitWhenBeatHasData(io.inner.release))
}
if (p(EnableL2Logging)) {
when (io.irel().hasData()) {
printf("[release] addr_block=%x addr_beat=%d data=%x\n",
io.irel().addr_block, io.irel().addr_beat, io.irel().data)
}
}
}
io.inner.grant.valid := state.isOneOf(s_wb_req, s_wb_resp, s_inner_probe, s_busy) &&
vol_ignt_counter.pending &&
!(pending_irel_data.orR || block_vol_ignt)
io.inner.grant.bits := inner_coh.makeGrant(xact_vol_irel)
scoreboard += (pending_irel_data.orR, vol_ignt_counter.pending)
}
}
trait EmitsVoluntaryReleases extends HasVoluntaryReleaseMetadataBuffer {
val pending_orel_send = Reg(init=Bool(false))
val pending_orel_data = Reg(init=Bits(0, width = innerDataBeats))
val vol_ognt_counter = Wire(new TwoWayBeatCounterStatus)
val pending_orel = pending_orel_send || pending_orel_data.orR || vol_ognt_counter.pending
val sending_orel = Reg(init = Bool(false))
// Block acceptance of inner releases if we have already started sending
// outer releases, but have not yet sent out the beat corresponding to the
// inner release. This function must be included in io.inner.release.ready
// if it is possible to start accepting a new inner release as the previous
// outer release is still being sent. DO NOT include this in the
// io.inner.release.ready if the releases are not buffered
// (i.e. io.inner.release and io.outer.release combinationally linked).
def blockInnerRelease(rel: ReleaseMetadata = io.irel()): Bool = {
val waiting_to_send = sending_orel && pending_orel_data(rel.addr_beat)
val sending_now = io.outer.release.fire() && rel.addr_beat === io.orel().addr_beat
rel.hasData() && (waiting_to_send || sending_now)
}
def outerRelease(
coh: ClientMetadata,
buffering: Bool = Bool(true),
data: UInt = io.irel().data,
add_pending_data_bits: UInt = UInt(0),
add_pending_send_bit: Bool = Bool(false),
block_orel: Bool = Bool(false)) {
when (state =/= s_idle || io.alloc.irel.should) {
pending_orel_data := (pending_orel_data |
addPendingBitWhenBeatHasData(io.inner.release) |
add_pending_data_bits) &
dropPendingBitWhenBeatHasData(io.outer.release)
}
when (add_pending_send_bit) { pending_orel_send := Bool(true) }
when (io.outer.release.fire()) {
when (io.outer.release.bits.first()) { sending_orel := Bool(true) }
when (io.outer.release.bits.last()) { sending_orel := Bool(false) }
pending_orel_send := Bool(false)
}
connectTwoWayBeatCounters(
status = vol_ognt_counter,
up = io.outer.release,
down = io.outer.grant,
trackUp = (r: Release) => r.isVoluntary() && r.requiresAck(),
trackDown = (g: Grant) => g.isVoluntary())
io.outer.release.valid := !block_orel && Mux(buffering,
(state === s_busy) && Mux(io.orel().hasData(),
pending_orel_data(vol_ognt_counter.up.idx),
pending_orel_send),
// only writebacks need to be forwarded to the outer interface
state =/= s_idle && io.alloc.irel.matches &&
io.irel().hasData() && io.inner.release.valid)
io.outer.release.bits := coh.makeVoluntaryWriteback(
client_xact_id = UInt(0), // TODO was tracker id, but not needed?
addr_block = xact_addr_block,
addr_beat = vol_ognt_counter.up.idx,
data = data)
when (vol_ognt_counter.pending) { io.outer.grant.ready := Bool(true) }
scoreboard += (pending_orel, vol_ognt_counter.pending)
}
}
trait EmitsInnerProbes extends HasBlockAddressBuffer
with HasXactTrackerStates
with HasPendingBitHelpers {
def io: HierarchicalXactTrackerIO
val needs_probes = (innerNCachingClients > 0)
val pending_iprbs = Reg(UInt(width = max(innerNCachingClients, 1)))
val curr_probe_dst = PriorityEncoder(pending_iprbs)
def full_representation: UInt
def initializeProbes() {
if (needs_probes)
pending_iprbs := full_representation & ~io.incoherent.asUInt
else
pending_iprbs := UInt(0)
}
def irel_same_xact = io.irel().conflicts(xact_addr_block) &&
!io.irel().isVoluntary() &&
state === s_inner_probe
def innerProbe(prb: Probe, next: UInt) {
if (needs_probes) {
val irel_counter = Wire(new TwoWayBeatCounterStatus)
pending_iprbs := pending_iprbs & dropPendingBitAtDest(io.inner.probe)
io.inner.probe.valid := state === s_inner_probe && pending_iprbs.orR
io.inner.probe.bits := prb
connectTwoWayBeatCounters(
status = irel_counter,
up = io.inner.probe,
down = io.inner.release,
max = innerNCachingClients,
trackDown = (r: Release) => (state =/= s_idle) && !r.isVoluntary())
when(state === s_inner_probe && !(pending_iprbs.orR || irel_counter.pending)) {
state := next
}
} else {
when (state === s_inner_probe) { state := next }
}
//N.B. no pending bits added to scoreboard because all handled in s_inner_probe
}
}
trait RoutesInParent extends HasBlockAddressBuffer
with HasXactTrackerStates {
def io: HierarchicalXactTrackerIO
type AddrComparison = HasCacheBlockAddress => Bool
def exactAddrMatch(a: HasCacheBlockAddress): Bool = a.conflicts(xact_addr_block)
def routeInParent(iacqMatches: AddrComparison = exactAddrMatch,
irelMatches: AddrComparison = exactAddrMatch,
oprbMatches: AddrComparison = exactAddrMatch,
iacqCanAlloc: Bool = Bool(false),
irelCanAlloc: Bool = Bool(false),
oprbCanAlloc: Bool = Bool(false)) {
io.alloc.iacq.matches := (state =/= s_idle) && iacqMatches(io.iacq())
io.alloc.irel.matches := (state =/= s_idle) && irelMatches(io.irel())
io.alloc.oprb.matches := (state =/= s_idle) && oprbMatches(io.oprb())
io.alloc.iacq.can := state === s_idle && iacqCanAlloc
io.alloc.irel.can := state === s_idle && irelCanAlloc
io.alloc.oprb.can := state === s_idle && oprbCanAlloc
io.alloc.addr_block := xact_addr_block
io.alloc.idle := state === s_idle
}
}
trait AcceptsInnerAcquires extends HasAcquireMetadataBuffer
with AcceptsVoluntaryReleases
with HasXactTrackerStates
with HasPendingBitHelpers {
def io: HierarchicalXactTrackerIO
def nSecondaryMisses: Int
def alwaysWriteFullBeat: Boolean
def inner_coh: ManagerMetadata
def trackerId: Int
// Secondary miss queue holds transaction metadata used to make grants
lazy val ignt_q = Module(new Queue(
new SecondaryMissInfo()(p.alterPartial({ case TLId => p(InnerTLId) })),
1 + nSecondaryMisses))
val pending_ignt = Wire(Bool())
val ignt_data_idx = Wire(UInt())
val ignt_data_done = Wire(Bool())
val ifin_counter = Wire(new TwoWayBeatCounterStatus)
val pending_put_data = Reg(init=Bits(0, width = innerDataBeats))
val pending_ignt_data = Reg(init=Bits(0, width = innerDataBeats))
def iacq_same_xact: Bool =
(xact_iacq.client_xact_id === io.iacq().client_xact_id) &&
(xact_iacq.client_id === io.iacq().client_id) &&
pending_ignt
def iacq_same_xact_multibeat = iacq_same_xact && io.iacq().hasMultibeatData()
def iacq_can_merge: Bool
def iacq_is_allocating: Bool = state === s_idle && io.alloc.iacq.should && io.inner.acquire.valid
def iacq_is_merging: Bool = (iacq_can_merge || iacq_same_xact) && io.inner.acquire.valid
def innerAcquire(can_alloc: Bool, next: UInt) {
val iacq_matches_head = iacq_same_xact && xact_iacq.addr_beat === io.iacq().addr_beat
// Enqueue some metadata information that we'll use to make coherence updates with later
ignt_q.io.enq.valid := iacq_is_allocating ||
(!iacq_matches_head && pending_ignt &&
io.inner.acquire.fire() && io.iacq().first())
ignt_q.io.enq.bits := io.iacq()
// Use the outputs of the queue to make further messages
xact_iacq := Mux(ignt_q.io.deq.valid, ignt_q.io.deq.bits, ignt_q.io.enq.bits)
xact_addr_beat := xact_iacq.addr_beat
pending_ignt := ignt_q.io.count > UInt(0)
// Track whether any beats are missing from a PutBlock
when (state =/= s_idle || io.alloc.iacq.should) {
pending_put_data := (pending_put_data &
dropPendingBitWhenBeatHasData(io.inner.acquire)) |
addPendingBitsOnFirstBeat(io.inner.acquire)
}
// Intialize transaction metadata for accepted Acquire
when(iacq_is_allocating) {
xact_addr_block := io.iacq().addr_block
xact_allocate := io.iacq().allocate() && can_alloc
xact_amo_shift_bytes := io.iacq().amo_shift_bytes()
xact_op_code := io.iacq().op_code()
xact_addr_byte := io.iacq().addr_byte()
xact_op_size := io.iacq().op_size()
// Make sure to collect all data from a PutBlock
pending_put_data := Mux(
io.iacq().isBuiltInType(Acquire.putBlockType),
dropPendingBitWhenBeatHasData(io.inner.acquire),
UInt(0))
pending_ignt_data := UInt(0)
state := next
}
scoreboard += (pending_put_data.orR)
}
def innerGrant(
data: UInt = io.ognt().data,
external_pending: Bool = Bool(false),
buffering: Bool = Bool(true),
add_pending_bits: UInt = UInt(0)) {
// Track the number of outstanding inner.finishes
connectTwoWayBeatCounters(
status = ifin_counter,
up = io.inner.grant,
down = io.inner.finish,
max = nSecondaryMisses,
trackUp = (g: Grant) => g.requiresAck())
// Track which beats are ready for response
when(!iacq_is_allocating) {
pending_ignt_data := pending_ignt_data |
addPendingBitWhenBeatHasData(io.inner.release) |
addPendingBitWhenBeatHasData(io.outer.grant) |
add_pending_bits
}
if (p(EnableL2Logging)) {
when (io.inner.grant.fire() && io.ignt().hasData()) {
printf("[get] addr_block=%x addr_beat=%d data=%x\n",
xact_addr_block, io.ignt().addr_beat, io.ignt().data)
}
}
// Have we finished receiving the complete inner acquire transaction?
val iacq_finished = !(state === s_idle ||
state === s_meta_read ||
pending_put_data.orR)
val ignt_from_iacq = inner_coh.makeGrant(
sec = ignt_q.io.deq.bits,
manager_xact_id = UInt(trackerId),
data = data)
// Make the Grant message using the data stored in the secondary miss queue
val (cnt, done) = connectOutgoingDataBeatCounter(io.inner.grant, ignt_q.io.deq.bits.addr_beat)
ignt_data_idx := cnt
ignt_data_done := done
ignt_q.io.deq.ready := Bool(false)
when(!vol_ignt_counter.pending) {
ignt_q.io.deq.ready := ignt_data_done
io.inner.grant.bits := ignt_from_iacq
io.inner.grant.bits.addr_beat := ignt_data_idx // override based on outgoing counter
when (state === s_busy && pending_ignt) {
io.inner.grant.valid := !external_pending &&
Mux(io.ignt().hasData(),
Mux(buffering,
pending_ignt_data(ignt_data_idx),
io.outer.grant.valid),
iacq_finished)
}
}
// We must wait for as many Finishes as we sent Grants
io.inner.finish.ready := state === s_busy
scoreboard += (pending_ignt, ifin_counter.pending)
}
}
trait EmitsOuterAcquires extends AcceptsInnerAcquires {
val ognt_counter = Wire(new TwoWayBeatCounterStatus)
// Handle misses or coherence permission upgrades by initiating a new transaction in the outer memory:
//
// If we're allocating in this cache, we can use the current metadata
// to make an appropriate custom Acquire, otherwise we copy over the
// built-in Acquire from the inner TL to the outer TL
def outerAcquire(
caching: Bool,
coh: ClientMetadata,
block_outer_acquire: Bool = Bool(false),
buffering: Bool = Bool(true),
data: UInt = io.iacq().data,
wmask: UInt = io.iacq().wmask(),
next: UInt = s_busy) {
// Tracks outstanding Acquires, waiting for their matching Grant.
connectTwoWayBeatCounters(
status = ognt_counter,
up = io.outer.acquire,
down = io.outer.grant,
beat = xact_addr_beat,
trackDown = (g: Grant) => !g.isVoluntary())
io.outer.acquire.valid :=
state === s_outer_acquire && !block_outer_acquire &&
(xact_allocate ||
Mux(buffering,
!pending_put_data(ognt_counter.up.idx),
// If not buffering, we should only send an outer acquire if
// the ignt_q is not empty (pending_ignt) and the enqueued
// transaction does not have data or we are receiving the
// inner acquire and it is the same transaction as the one enqueued.
pending_ignt && (!xact_iacq.hasData() ||
(io.inner.acquire.valid && iacq_same_xact))))
io.outer.acquire.bits :=
Mux(caching,
coh.makeAcquire(
op_code = xact_op_code,
client_xact_id = UInt(0),
addr_block = xact_addr_block),
BuiltInAcquireBuilder(
a_type = xact_iacq.a_type,
client_xact_id = UInt(0),
addr_block = xact_addr_block,
addr_beat = ognt_counter.up.idx,
data = data,
addr_byte = xact_addr_byte,
operand_size = xact_op_size,
opcode = xact_op_code,
wmask = wmask,
alloc = Bool(false))
(p.alterPartial({ case TLId => p(OuterTLId)})))
when(state === s_outer_acquire && ognt_counter.up.done) { state := next }
when (ognt_counter.pending) { io.outer.grant.ready := Bool(true) }
scoreboard += ognt_counter.pending
}
}
abstract class VoluntaryReleaseTracker(val trackerId: Int)(implicit p: Parameters) extends XactTracker()(p)
with AcceptsVoluntaryReleases
with RoutesInParent {
def irel_can_merge = Bool(false)
def irel_same_xact = io.irel().conflicts(xact_addr_block) &&
io.irel().isVoluntary() &&
pending_irel_data.orR
}
abstract class AcquireTracker(val trackerId: Int)(implicit p: Parameters) extends XactTracker()(p)
with AcceptsInnerAcquires
with EmitsOuterAcquires
with EmitsInnerProbes
with RoutesInParent {
}

View File

@ -0,0 +1,43 @@
// See LICENSE for license details.
package uncore.coherence
import Chisel._
// This class encapsulates transformations on different directory information
// storage formats
abstract class DirectoryRepresentation(val width: Int) {
def pop(prev: UInt, id: UInt): UInt
def push(prev: UInt, id: UInt): UInt
def flush: UInt
def none(s: UInt): Bool
def one(s: UInt): Bool
def count(s: UInt): UInt
def next(s: UInt): UInt
def full(s: UInt): UInt
}
abstract trait HasDirectoryRepresentation {
val dir: DirectoryRepresentation
}
class NullRepresentation(nClients: Int) extends DirectoryRepresentation(1) {
def pop(prev: UInt, id: UInt) = UInt(0)
def push(prev: UInt, id: UInt) = UInt(0)
def flush = UInt(0)
def none(s: UInt) = Bool(false)
def one(s: UInt) = Bool(false)
def count(s: UInt) = UInt(nClients)
def next(s: UInt) = UInt(0)
def full(s: UInt) = SInt(-1, width = nClients).asUInt
}
class FullRepresentation(nClients: Int) extends DirectoryRepresentation(nClients) {
def pop(prev: UInt, id: UInt) = prev & ~UIntToOH(id)
def push(prev: UInt, id: UInt) = prev | UIntToOH(id)
def flush = UInt(0, width = width)
def none(s: UInt) = s === UInt(0)
def one(s: UInt) = PopCount(s) === UInt(1)
def count(s: UInt) = PopCount(s)
def next(s: UInt) = PriorityEncoder(s)
def full(s: UInt) = s
}

View File

@ -0,0 +1,344 @@
// See LICENSE for license details.
package uncore.coherence
import Chisel._
import uncore.tilelink._
import uncore.constants._
import cde.{Parameters, Field}
/** Identifies the TLId of the inner network in a hierarchical cache controller */
case object InnerTLId extends Field[String]
/** Identifies the TLId of the outer network in a hierarchical cache controller */
case object OuterTLId extends Field[String]
/** Base class to represent coherence information in clients and managers */
abstract class CoherenceMetadata(implicit p: Parameters) extends TLBundle()(p) {
val co = tlCoh
}
/** Stores the client-side coherence information,
* such as permissions on the data and whether the data is dirty.
* Its API can be used to make TileLink messages in response to
* memory operations or [[uncore.Probe]] messages.
*/
class ClientMetadata(implicit p: Parameters) extends CoherenceMetadata()(p) {
/** Actual state information stored in this bundle */
val state = UInt(width = co.clientStateWidth)
/** Metadata equality */
def ===(rhs: ClientMetadata): Bool = this.state === rhs.state
def =/=(rhs: ClientMetadata): Bool = !this.===(rhs)
/** Is the block's data present in this cache */
def isValid(dummy: Int = 0): Bool = co.isValid(this)
/** Does this cache have permissions on this block sufficient to perform op */
def isHit(op_code: UInt): Bool = co.isHit(op_code, this)
/** Does this cache lack permissions on this block sufficient to perform op */
def isMiss(op_code: UInt): Bool = !co.isHit(op_code, this)
/** Does a secondary miss on the block require another Acquire message */
def requiresAcquireOnSecondaryMiss(first_op: UInt, second_op: UInt): Bool =
co.requiresAcquireOnSecondaryMiss(first_op, second_op, this)
/** Does op require a Release to be made to outer memory */
def requiresReleaseOnCacheControl(op_code: UInt): Bool =
co.requiresReleaseOnCacheControl(op_code: UInt, this)
/** Does an eviction require a Release to be made to outer memory */
def requiresVoluntaryWriteback(dummy: Int = 0): Bool =
co.requiresReleaseOnCacheControl(M_FLUSH, this)
/** Constructs an Acquire message based on this metdata and a memory operation
*
* @param client_xact_id client's transaction id
* @param addr_block address of the cache block
* @param op_code a memory operation from [[uncore.constants.MemoryOpConstants]]
*/
def makeAcquire(
op_code: UInt,
client_xact_id: UInt,
addr_block: UInt): Acquire = {
Acquire(
is_builtin_type = Bool(false),
a_type = co.getAcquireType(op_code, this),
client_xact_id = client_xact_id,
addr_block = addr_block,
union = Cat(op_code, Bool(true)))(p)
}
/** Constructs a Release message based on this metadata on cache control op
*
* @param client_xact_id client's transaction id
* @param addr_block address of the cache block
* @param addr_beat sub-block address (which beat)
* @param data data being written back
*/
def makeVoluntaryRelease(
op_code: UInt,
client_xact_id: UInt,
addr_block: UInt,
addr_beat: UInt = UInt(0),
data: UInt = UInt(0)): Release =
Release(
voluntary = Bool(true),
r_type = co.getReleaseType(op_code, this),
client_xact_id = client_xact_id,
addr_block = addr_block,
addr_beat = addr_beat,
data = data)(p)
/** Constructs a Release message based on this metadata on an eviction
*
* @param client_xact_id client's transaction id
* @param addr_block address of the cache block
* @param addr_beat sub-block address (which beat)
* @param data data being written back
*/
def makeVoluntaryWriteback(
client_xact_id: UInt,
addr_block: UInt,
addr_beat: UInt = UInt(0),
data: UInt = UInt(0)): Release =
makeVoluntaryRelease(
op_code = M_FLUSH,
client_xact_id = client_xact_id,
addr_block = addr_block,
addr_beat = addr_beat,
data = data)
/** Constructs a Release message based on this metadata and a [[uncore.Probe]]
*
* @param the incoming [[uncore.Probe]]
* @param addr_beat sub-block address (which beat)
* @param data data being released
*/
def makeRelease(
prb: Probe,
addr_beat: UInt = UInt(0),
data: UInt = UInt(0)): Release =
Release(
voluntary = Bool(false),
r_type = co.getReleaseType(prb, this),
client_xact_id = UInt(0),
addr_block = prb.addr_block,
addr_beat = addr_beat,
data = data)(p)
/** New metadata after receiving a [[uncore.Grant]]
*
* @param incoming the incoming [[uncore.Grant]]
* @param pending the mem op that triggered this transaction
*/
def onGrant(incoming: Grant, pending: UInt): ClientMetadata =
co.clientMetadataOnGrant(incoming, pending, this)
/** New metadata after receiving a [[uncore.Probe]]
*
* @param incoming the incoming [[uncore.Probe]]
*/
def onProbe(incoming: Probe): ClientMetadata =
co.clientMetadataOnProbe(incoming, this)
/** New metadata after a op_code hits this block
*
* @param op_code a memory operation from [[uncore.constants.MemoryOpConstants]]
*/
def onHit(op_code: UInt): ClientMetadata =
co.clientMetadataOnHit(op_code, this)
/** New metadata after op_code releases permissions on this block
*
* @param op_code a memory operation from [[uncore.constants.MemoryOpConstants]]
*/
def onCacheControl(op_code: UInt): ClientMetadata =
co.clientMetadataOnCacheControl(op_code, this)
}
/** Factories for ClientMetadata, including on reset */
object ClientMetadata {
def apply(state: UInt)(implicit p: Parameters) = {
val meta = Wire(new ClientMetadata)
meta.state := state
meta
}
def onReset(implicit p: Parameters) = ClientMetadata(UInt(0))(p) // TODO: assumes clientInvalid === 0
}
/** Stores manager-side information about the status
* of a cache block, including whether it has any known sharers.
*
* Its API can be used to create [[uncore.Probe]] and [[uncore.Grant]] messages.
*/
class ManagerMetadata(implicit p: Parameters) extends CoherenceMetadata()(p) {
// Currently no coherence policies assume manager-side state information
// val state = UInt(width = co.masterStateWidth) TODO: Fix 0-width wires in Chisel
/** The directory information for this block */
val sharers = UInt(width = co.dir.width)
/** Metadata equality */
def ===(rhs: ManagerMetadata): Bool = //this.state === rhs.state && TODO: Fix 0-width wires in Chisel
this.sharers === rhs.sharers
def =/=(rhs: ManagerMetadata): Bool = !this.===(rhs)
/** Converts the directory info into an N-hot sharer bitvector (i.e. full representation) */
def full(dummy: Int = 0): UInt = co.dir.full(this.sharers)
/** Does this [[uncore.Acquire]] require [[uncore.Probe Probes]] to be sent */
def requiresProbes(acq: HasAcquireType): Bool = co.requiresProbes(acq, this)
/** Does this memory op require [[uncore.Probe Probes]] to be sent */
def requiresProbes(op_code: UInt): Bool = co.requiresProbes(op_code, this)
/** Does an eviction require [[uncore.Probe Probes]] to be sent */
def requiresProbesOnVoluntaryWriteback(dummy: Int = 0): Bool =
co.requiresProbes(M_FLUSH, this)
/** Construct an appropriate [[uncore.ProbeToDst]] for a given [[uncore.Acquire]]
*
* @param dst Destination client id for this Probe
* @param acq Acquire message triggering this Probe
* @param addr_block address of the cache block being probed
*/
def makeProbe(dst: UInt, acq: HasAcquireType, addr_block: UInt): ProbeToDst =
Probe(dst, co.getProbeType(acq, this), addr_block)(p)
/** Construct an appropriate [[uncore.ProbeToDst]] for a given [[uncore.Acquire]]
*
* @param dst Destination client id for this Probe
* @param acq Acquire message triggering this Probe
*/
def makeProbe(dst: UInt, acq: AcquireMetadata): ProbeToDst =
Probe(dst, co.getProbeType(acq, this), acq.addr_block)(p)
/** Construct an appropriate [[uncore.ProbeToDst]] for a given mem op
*
* @param dst Destination client id for this Probe
* @param op_code memory operation triggering this Probe
* @param addr_block address of the cache block being probed
*/
def makeProbe(dst: UInt, op_code: UInt, addr_block: UInt): ProbeToDst =
Probe(dst, co.getProbeType(op_code, this), addr_block)(p)
/** Construct an appropriate [[uncore.ProbeToDst]] for an eviction
*
* @param dst Destination client id for this Probe
* @param addr_block address of the cache block being probed prior to eviction
*/
def makeProbeForVoluntaryWriteback(dst: UInt, addr_block: UInt): ProbeToDst =
makeProbe(dst, M_FLUSH, addr_block)
/** Construct an appropriate [[uncore.GrantToDst]] to acknowledge an [[uncore.Release]]
*
* @param rel Release message being acknowledged by this Grant
*/
def makeGrant(rel: ReleaseMetadata with HasClientId): GrantToDst =
Grant(
dst = rel.client_id,
is_builtin_type = Bool(true),
g_type = Grant.voluntaryAckType,
client_xact_id = rel.client_xact_id,
manager_xact_id = UInt(0))(p)
/** Construct an appropriate [[uncore.GrantToDst]] to respond to an [[uncore.Acquire]]
*
* May contain single or multiple beats of data, or just be a permissions upgrade.
*
* @param acq Acquire message being responded to by this Grant
* @param manager_xact_id manager's transaction id
* @param addr_beat beat id of the data
* @param data data being refilled to the original requestor
*/
def makeGrant(
acq: AcquireMetadata with HasClientId,
manager_xact_id: UInt,
addr_beat: UInt = UInt(0),
data: UInt = UInt(0)): GrantToDst =
Grant(
dst = acq.client_id,
is_builtin_type = acq.isBuiltInType(),
g_type = co.getGrantType(acq, this),
client_xact_id = acq.client_xact_id,
manager_xact_id = manager_xact_id,
addr_beat = addr_beat,
data = data)(p)
/** Construct an [[uncore.GrantToDst]] to respond to an [[uncore.Acquire]] with some overrides
*
* Used to respond to secondary misses merged into this transaction.
* May contain single or multiple beats of data.
*
* @param sec Secondary miss info
* @param manager_xact_id manager's transaction id
* @param data data being refilled to the original requestor
*/
def makeGrant(
sec: SecondaryMissInfo,
manager_xact_id: UInt,
data: UInt): GrantToDst = {
Grant(
dst = sec.client_id,
is_builtin_type = sec.isBuiltInType(),
g_type = co.getGrantType(sec, this),
client_xact_id = sec.client_xact_id,
manager_xact_id = manager_xact_id,
addr_beat = sec.addr_beat,
data = data)(p)
}
/** New metadata after receiving a [[uncore.ReleaseFromSrc]]
*
* @param incoming the incoming [[uncore.ReleaseFromSrc]]
*/
def onRelease(incoming: ReleaseMetadata with HasClientId): ManagerMetadata =
co.managerMetadataOnRelease(incoming, incoming.client_id, this)
/** New metadata after sending a [[uncore.GrantToDst]]
*
* @param outgoing the outgoing [[uncore.GrantToDst]]
*/
def onGrant(outgoing: GrantMetadata with HasClientId): ManagerMetadata =
co.managerMetadataOnGrant(outgoing, outgoing.client_id, this)
}
/** Factories for ManagerMetadata, including on reset */
object ManagerMetadata {
def apply(sharers: UInt, state: UInt = UInt(width = 0))(implicit p: Parameters) = {
val meta = Wire(new ManagerMetadata)
//meta.state := state TODO: Fix 0-width wires in Chisel
meta.sharers := sharers
meta
}
def apply(implicit p: Parameters) = {
val meta = Wire(new ManagerMetadata)
//meta.state := UInt(width = 0) TODO: Fix 0-width wires in Chisel
meta.sharers := meta.co.dir.flush
meta
}
def onReset(implicit p: Parameters) = ManagerMetadata(p)
}
/** HierarchicalMetadata is used in a cache in a multi-level memory hierarchy
* that is a manager with respect to some inner caches and a client with
* respect to some outer cache.
*
* This class makes use of two different sets of TileLink parameters, which are
* applied by contextually mapping [[uncore.TLId]] to one of
* [[uncore.InnerTLId]] or [[uncore.OuterTLId]].
*/
class HierarchicalMetadata(implicit p: Parameters) extends CoherenceMetadata()(p) {
val inner: ManagerMetadata = new ManagerMetadata()(p.alterPartial({case TLId => p(InnerTLId)}))
val outer: ClientMetadata = new ClientMetadata()(p.alterPartial({case TLId => p(OuterTLId)}))
def ===(rhs: HierarchicalMetadata): Bool =
this.inner === rhs.inner && this.outer === rhs.outer
def =/=(rhs: HierarchicalMetadata): Bool = !this.===(rhs)
}
/** Factories for HierarchicalMetadata, including on reset */
object HierarchicalMetadata {
def apply(inner: ManagerMetadata, outer: ClientMetadata)
(implicit p: Parameters): HierarchicalMetadata = {
val m = Wire(new HierarchicalMetadata)
m.inner := inner
m.outer := outer
m
}
def onReset(implicit p: Parameters): HierarchicalMetadata =
apply(ManagerMetadata.onReset, ClientMetadata.onReset)
}

View File

@ -0,0 +1,696 @@
// See LICENSE for license details.
package uncore.coherence
import Chisel._
import uncore.tilelink._
import uncore.constants._
import uncore.util._
/** The entire CoherencePolicy API consists of the following three traits:
* HasCustomTileLinkMessageTypes, used to define custom messages
* HasClientSideCoherencePolicy, for client coherence agents
* HasManagerSideCoherencePolicy, for manager coherence agents
*/
abstract class CoherencePolicy(val dir: DirectoryRepresentation)
extends HasCustomTileLinkMessageTypes
with HasClientSideCoherencePolicy
with HasManagerSideCoherencePolicy
/** This API defines the custom, coherence-policy-defined message types,
* as opposed to the built-in ones found in tilelink.scala.
* Policies must enumerate the custom messages to be sent over each
* channel, as well as which of them have associated data.
*/
trait HasCustomTileLinkMessageTypes {
val nAcquireTypes: Int
def acquireTypeWidth = log2Up(nAcquireTypes)
val nProbeTypes: Int
def probeTypeWidth = log2Up(nProbeTypes)
val nReleaseTypes: Int
def releaseTypeWidth = log2Up(nReleaseTypes)
val nGrantTypes: Int
def grantTypeWidth = log2Up(nGrantTypes)
val acquireTypesWithData = Nil // Only built-in Acquire types have data for now
def releaseTypesWithData: Seq[UInt]
def grantTypesWithData: Seq[UInt]
}
/** This API contains all functions required for client coherence agents.
* Policies must enumerate the number of client states and define their
* permissions with respect to memory operations. Policies must fill in functions
* to control which messages are sent and how metadata is updated in response
* to coherence events. These funtions are generally called from within the
* ClientMetadata class in metadata.scala
*/
trait HasClientSideCoherencePolicy {
// Client coherence states and their permissions
val nClientStates: Int
def clientStateWidth = log2Ceil(nClientStates)
def clientStatesWithReadPermission: Seq[UInt]
def clientStatesWithWritePermission: Seq[UInt]
def clientStatesWithDirtyData: Seq[UInt]
// Transaction initiation logic
def isValid(meta: ClientMetadata): Bool
def isHit(cmd: UInt, meta: ClientMetadata): Bool = {
Mux(isWriteIntent(cmd),
meta.state isOneOf clientStatesWithWritePermission,
meta.state isOneOf clientStatesWithReadPermission)
}
//TODO: Assumes all states with write permissions also have read permissions
def requiresAcquireOnSecondaryMiss(
first_cmd: UInt,
second_cmd: UInt,
meta: ClientMetadata): Bool = {
isWriteIntent(second_cmd) && !isWriteIntent(first_cmd)
}
//TODO: Assumes all cache ctrl ops writeback dirty data, and
// doesn't issue transaction when e.g. downgrading Exclusive to Shared:
def requiresReleaseOnCacheControl(cmd: UInt, meta: ClientMetadata): Bool =
meta.state isOneOf clientStatesWithDirtyData
// Determine which custom message type to use
def getAcquireType(cmd: UInt, meta: ClientMetadata): UInt
def getReleaseType(cmd: UInt, meta: ClientMetadata): UInt
def getReleaseType(p: HasProbeType, meta: ClientMetadata): UInt
// Mutate ClientMetadata based on messages or cmds
def clientMetadataOnHit(cmd: UInt, meta: ClientMetadata): ClientMetadata
def clientMetadataOnCacheControl(cmd: UInt, meta: ClientMetadata): ClientMetadata
def clientMetadataOnGrant(incoming: HasGrantType, cmd: UInt, meta: ClientMetadata): ClientMetadata
def clientMetadataOnProbe(incoming: HasProbeType, meta: ClientMetadata): ClientMetadata
}
/** This API contains all functions required for manager coherence agents.
* Policies must enumerate the number of manager states. Policies must fill
* in functions to control which Probe and Grant messages are sent and how
* metadata should be updated in response to coherence events. These funtions
* are generally called from within the ManagerMetadata class in metadata.scala
*/
trait HasManagerSideCoherencePolicy extends HasDirectoryRepresentation {
val nManagerStates: Int
def masterStateWidth = log2Ceil(nManagerStates)
// Transaction probing logic
def requiresProbes(acq: HasAcquireType, meta: ManagerMetadata): Bool
def requiresProbes(cmd: UInt, meta: ManagerMetadata): Bool
// Determine which custom message type to use in response
def getProbeType(cmd: UInt, meta: ManagerMetadata): UInt
def getProbeType(acq: HasAcquireType, meta: ManagerMetadata): UInt
def getGrantType(acq: HasAcquireType, meta: ManagerMetadata): UInt
def getExclusiveGrantType(): UInt
// Mutate ManagerMetadata based on messages or cmds
def managerMetadataOnRelease(incoming: HasReleaseType, src: UInt, meta: ManagerMetadata): ManagerMetadata
def managerMetadataOnGrant(outgoing: HasGrantType, dst: UInt, meta: ManagerMetadata) =
ManagerMetadata(sharers=Mux(outgoing.isBuiltInType(), // Assumes all built-ins are uncached
meta.sharers,
dir.push(meta.sharers, dst)))(meta.p)
//state = meta.state) TODO: Fix 0-width wires in Chisel
}
/** The following concrete implementations of CoherencePolicy each provide the
* functionality of one particular protocol.
*/
/** A simple protocol with only two Client states.
* Data is always assumed to be dirty.
* Only a single client may ever have a copy of a block at a time.
*/
class MICoherence(dir: DirectoryRepresentation) extends CoherencePolicy(dir) {
// Message types
val nAcquireTypes = 1
val nProbeTypes = 2
val nReleaseTypes = 4
val nGrantTypes = 1
val acquireExclusive :: Nil = Enum(UInt(), nAcquireTypes)
val probeInvalidate :: probeCopy :: Nil = Enum(UInt(), nProbeTypes)
val releaseInvalidateData :: releaseCopyData :: releaseInvalidateAck :: releaseCopyAck :: Nil = Enum(UInt(), nReleaseTypes)
val grantExclusive :: Nil = Enum(UInt(), nGrantTypes)
def releaseTypesWithData = Seq(releaseInvalidateData, releaseCopyData)
def grantTypesWithData = Seq(grantExclusive)
// Client states and functions
val nClientStates = 2
val clientInvalid :: clientValid :: Nil = Enum(UInt(), nClientStates)
def clientStatesWithReadPermission = Seq(clientValid)
def clientStatesWithWritePermission = Seq(clientValid)
def clientStatesWithDirtyData = Seq(clientValid)
def isValid (meta: ClientMetadata): Bool = meta.state =/= clientInvalid
def getAcquireType(cmd: UInt, meta: ClientMetadata): UInt = acquireExclusive
def getReleaseType(cmd: UInt, meta: ClientMetadata): UInt = {
val dirty = meta.state isOneOf clientStatesWithDirtyData
MuxLookup(cmd, releaseCopyAck, Array(
M_FLUSH -> Mux(dirty, releaseInvalidateData, releaseInvalidateAck),
M_PRODUCE -> Mux(dirty, releaseCopyData, releaseCopyAck),
M_CLEAN -> Mux(dirty, releaseCopyData, releaseCopyAck)))
}
def getReleaseType(incoming: HasProbeType, meta: ClientMetadata): UInt =
MuxLookup(incoming.p_type, releaseInvalidateAck, Array(
probeInvalidate -> getReleaseType(M_FLUSH, meta),
probeCopy -> getReleaseType(M_FLUSH, meta)))
def clientMetadataOnHit(cmd: UInt, meta: ClientMetadata) = meta
def clientMetadataOnCacheControl(cmd: UInt, meta: ClientMetadata) =
ClientMetadata(Mux(cmd === M_FLUSH, clientInvalid, meta.state))(meta.p)
def clientMetadataOnGrant(incoming: HasGrantType, cmd: UInt, meta: ClientMetadata) =
ClientMetadata(Mux(incoming.isBuiltInType(), clientInvalid, clientValid))(meta.p)
def clientMetadataOnProbe(incoming: HasProbeType, meta: ClientMetadata) =
ClientMetadata(Mux(incoming.p_type === probeInvalidate,
clientInvalid, meta.state))(meta.p)
// Manager states and functions:
val nManagerStates = 0 // We don't actually need any states for this protocol
def requiresProbes(a: HasAcquireType, meta: ManagerMetadata) = !dir.none(meta.sharers)
def requiresProbes(cmd: UInt, meta: ManagerMetadata) = !dir.none(meta.sharers)
def getProbeType(cmd: UInt, meta: ManagerMetadata): UInt =
MuxLookup(cmd, probeCopy, Array(
M_FLUSH -> probeInvalidate))
def getProbeType(a: HasAcquireType, meta: ManagerMetadata): UInt =
Mux(a.isBuiltInType(),
MuxLookup(a.a_type, probeCopy, Array(
Acquire.getBlockType -> probeCopy,
Acquire.putBlockType -> probeInvalidate,
Acquire.getType -> probeCopy,
Acquire.putType -> probeInvalidate,
Acquire.getPrefetchType -> probeCopy,
Acquire.putPrefetchType -> probeInvalidate,
Acquire.putAtomicType -> probeInvalidate)),
probeInvalidate)
def getGrantType(a: HasAcquireType, meta: ManagerMetadata): UInt =
Mux(a.isBuiltInType(), Acquire.getBuiltInGrantType(a.a_type), grantExclusive)
def getExclusiveGrantType(): UInt = grantExclusive
def managerMetadataOnRelease(incoming: HasReleaseType, src: UInt, meta: ManagerMetadata) = {
val popped = ManagerMetadata(sharers=dir.pop(meta.sharers, src))(meta.p)
MuxCase(meta, Array(
incoming.is(releaseInvalidateData) -> popped,
incoming.is(releaseInvalidateAck) -> popped))
}
}
/** A simple protocol with only three Client states.
* Data is marked as dirty when written.
* Only a single client may ever have a copy of a block at a time.
*/
class MEICoherence(dir: DirectoryRepresentation) extends CoherencePolicy(dir) {
// Message types
val nAcquireTypes = 2
val nProbeTypes = 3
val nReleaseTypes = 6
val nGrantTypes = 1
val acquireExclusiveClean :: acquireExclusiveDirty :: Nil = Enum(UInt(), nAcquireTypes)
val probeInvalidate :: probeDowngrade :: probeCopy :: Nil = Enum(UInt(), nProbeTypes)
val releaseInvalidateData :: releaseDowngradeData :: releaseCopyData :: releaseInvalidateAck :: releaseDowngradeAck :: releaseCopyAck :: Nil = Enum(UInt(), nReleaseTypes)
val grantExclusive :: Nil = Enum(UInt(), nGrantTypes)
def releaseTypesWithData = Seq(releaseInvalidateData, releaseDowngradeData, releaseCopyData)
def grantTypesWithData = Seq(grantExclusive)
// Client states and functions
val nClientStates = 3
val clientInvalid :: clientExclusiveClean :: clientExclusiveDirty :: Nil = Enum(UInt(), nClientStates)
def clientStatesWithReadPermission = Seq(clientExclusiveClean, clientExclusiveDirty)
def clientStatesWithWritePermission = Seq(clientExclusiveClean, clientExclusiveDirty)
def clientStatesWithDirtyData = Seq(clientExclusiveDirty)
def isValid (meta: ClientMetadata) = meta.state =/= clientInvalid
def getAcquireType(cmd: UInt, meta: ClientMetadata): UInt =
Mux(isWriteIntent(cmd), acquireExclusiveDirty, acquireExclusiveClean)
def getReleaseType(cmd: UInt, meta: ClientMetadata): UInt = {
val dirty = meta.state isOneOf clientStatesWithDirtyData
MuxLookup(cmd, releaseCopyAck, Array(
M_FLUSH -> Mux(dirty, releaseInvalidateData, releaseInvalidateAck),
M_PRODUCE -> Mux(dirty, releaseDowngradeData, releaseDowngradeAck),
M_CLEAN -> Mux(dirty, releaseCopyData, releaseCopyAck)))
}
def getReleaseType(incoming: HasProbeType, meta: ClientMetadata): UInt =
MuxLookup(incoming.p_type, releaseInvalidateAck, Array(
probeInvalidate -> getReleaseType(M_FLUSH, meta),
probeDowngrade -> getReleaseType(M_FLUSH, meta),
probeCopy -> getReleaseType(M_FLUSH, meta)))
def clientMetadataOnHit(cmd: UInt, meta: ClientMetadata) =
ClientMetadata(Mux(isWrite(cmd), clientExclusiveDirty, meta.state))(meta.p)
def clientMetadataOnCacheControl(cmd: UInt, meta: ClientMetadata) =
ClientMetadata(
MuxLookup(cmd, meta.state, Array(
M_FLUSH -> clientInvalid,
M_CLEAN -> Mux(meta.state === clientExclusiveDirty, clientExclusiveClean, meta.state))))(meta.p)
def clientMetadataOnGrant(incoming: HasGrantType, cmd: UInt, meta: ClientMetadata) =
ClientMetadata(
Mux(incoming.isBuiltInType(), clientInvalid,
Mux(isWrite(cmd), clientExclusiveDirty, clientExclusiveClean)))(meta.p)
def clientMetadataOnProbe(incoming: HasProbeType, meta: ClientMetadata) =
ClientMetadata(
MuxLookup(incoming.p_type, meta.state, Array(
probeInvalidate -> clientInvalid,
probeDowngrade -> clientInvalid,
probeCopy -> clientInvalid)))(meta.p)
// Manager states and functions:
val nManagerStates = 0 // We don't actually need any states for this protocol
def requiresProbes(a: HasAcquireType, meta: ManagerMetadata) = !dir.none(meta.sharers)
def requiresProbes(cmd: UInt, meta: ManagerMetadata) = !dir.none(meta.sharers)
def getProbeType(cmd: UInt, meta: ManagerMetadata): UInt =
MuxLookup(cmd, probeCopy, Array(
M_FLUSH -> probeInvalidate,
M_PRODUCE -> probeDowngrade))
def getProbeType(a: HasAcquireType, meta: ManagerMetadata): UInt =
Mux(a.isBuiltInType(),
MuxLookup(a.a_type, probeCopy, Array(
Acquire.getBlockType -> probeCopy,
Acquire.putBlockType -> probeInvalidate,
Acquire.getType -> probeCopy,
Acquire.putType -> probeInvalidate,
Acquire.getPrefetchType -> probeCopy,
Acquire.putPrefetchType -> probeInvalidate,
Acquire.putAtomicType -> probeInvalidate)),
probeInvalidate)
def getGrantType(a: HasAcquireType, meta: ManagerMetadata): UInt =
Mux(a.isBuiltInType(), Acquire.getBuiltInGrantType(a.a_type), grantExclusive)
def getExclusiveGrantType(): UInt = grantExclusive
def managerMetadataOnRelease(incoming: HasReleaseType, src: UInt, meta: ManagerMetadata) = {
val popped = ManagerMetadata(sharers=dir.pop(meta.sharers, src))(meta.p)
MuxCase(meta, Array(
incoming.is(releaseInvalidateData) -> popped,
incoming.is(releaseInvalidateAck) -> popped))
}
}
/** A protocol with only three Client states.
* Data is always assumed to be dirty.
* Multiple clients may share read permissions on a block at the same time.
*/
class MSICoherence(dir: DirectoryRepresentation) extends CoherencePolicy(dir) {
// Message types
val nAcquireTypes = 2
val nProbeTypes = 3
val nReleaseTypes = 6
val nGrantTypes = 3
val acquireShared :: acquireExclusive :: Nil = Enum(UInt(), nAcquireTypes)
val probeInvalidate :: probeDowngrade :: probeCopy :: Nil = Enum(UInt(), nProbeTypes)
val releaseInvalidateData :: releaseDowngradeData :: releaseCopyData :: releaseInvalidateAck :: releaseDowngradeAck :: releaseCopyAck :: Nil = Enum(UInt(), nReleaseTypes)
val grantShared :: grantExclusive :: grantExclusiveAck :: Nil = Enum(UInt(), nGrantTypes)
def releaseTypesWithData = Seq(releaseInvalidateData, releaseDowngradeData, releaseCopyData)
def grantTypesWithData = Seq(grantShared, grantExclusive)
// Client states and functions
val nClientStates = 3
val clientInvalid :: clientShared :: clientExclusiveDirty :: Nil = Enum(UInt(), nClientStates)
def clientStatesWithReadPermission = Seq(clientShared, clientExclusiveDirty)
def clientStatesWithWritePermission = Seq(clientExclusiveDirty)
def clientStatesWithDirtyData = Seq(clientExclusiveDirty)
def isValid(meta: ClientMetadata): Bool = meta.state =/= clientInvalid
def getAcquireType(cmd: UInt, meta: ClientMetadata): UInt =
Mux(isWriteIntent(cmd), acquireExclusive, acquireShared)
def getReleaseType(cmd: UInt, meta: ClientMetadata): UInt = {
val dirty = meta.state isOneOf clientStatesWithDirtyData
MuxLookup(cmd, releaseCopyAck, Array(
M_FLUSH -> Mux(dirty, releaseInvalidateData, releaseInvalidateAck),
M_PRODUCE -> Mux(dirty, releaseDowngradeData, releaseDowngradeAck),
M_CLEAN -> Mux(dirty, releaseCopyData, releaseCopyAck)))
}
def getReleaseType(incoming: HasProbeType, meta: ClientMetadata): UInt =
MuxLookup(incoming.p_type, releaseInvalidateAck, Array(
probeInvalidate -> getReleaseType(M_FLUSH, meta),
probeDowngrade -> getReleaseType(M_PRODUCE, meta),
probeCopy -> getReleaseType(M_PRODUCE, meta)))
def clientMetadataOnHit(cmd: UInt, meta: ClientMetadata) =
ClientMetadata(Mux(isWrite(cmd), clientExclusiveDirty, meta.state))(meta.p)
def clientMetadataOnCacheControl(cmd: UInt, meta: ClientMetadata) =
ClientMetadata(
MuxLookup(cmd, meta.state, Array(
M_FLUSH -> clientInvalid,
M_PRODUCE -> Mux(meta.state isOneOf clientStatesWithWritePermission,
clientShared, meta.state))))(meta.p)
def clientMetadataOnGrant(incoming: HasGrantType, cmd: UInt, meta: ClientMetadata) =
ClientMetadata(
Mux(incoming.isBuiltInType(), clientInvalid,
MuxLookup(incoming.g_type, clientInvalid, Array(
grantShared -> clientShared,
grantExclusive -> clientExclusiveDirty,
grantExclusiveAck -> clientExclusiveDirty))))(meta.p)
def clientMetadataOnProbe(incoming: HasProbeType, meta: ClientMetadata) =
ClientMetadata(
MuxLookup(incoming.p_type, meta.state, Array(
probeInvalidate -> clientInvalid,
probeDowngrade -> clientShared,
probeCopy -> clientShared)))(meta.p)
// Manager states and functions:
val nManagerStates = 0 // TODO: We could add a Shared state to avoid probing
// only a single sharer (also would need
// notification msg to track clean drops)
// Also could avoid probes on outer WBs.
def requiresProbes(a: HasAcquireType, meta: ManagerMetadata) =
Mux(dir.none(meta.sharers), Bool(false),
Mux(dir.one(meta.sharers), Bool(true), //TODO: for now we assume it's Exclusive
Mux(a.isBuiltInType(), a.hasData(), a.a_type =/= acquireShared)))
def requiresProbes(cmd: UInt, meta: ManagerMetadata) = !dir.none(meta.sharers)
def getProbeType(cmd: UInt, meta: ManagerMetadata): UInt =
MuxLookup(cmd, probeCopy, Array(
M_FLUSH -> probeInvalidate,
M_PRODUCE -> probeDowngrade))
def getProbeType(a: HasAcquireType, meta: ManagerMetadata): UInt =
Mux(a.isBuiltInType(),
MuxLookup(a.a_type, probeCopy, Array(
Acquire.getBlockType -> probeCopy,
Acquire.putBlockType -> probeInvalidate,
Acquire.getType -> probeCopy,
Acquire.putType -> probeInvalidate,
Acquire.getPrefetchType -> probeCopy,
Acquire.putPrefetchType -> probeInvalidate,
Acquire.putAtomicType -> probeInvalidate)),
MuxLookup(a.a_type, probeCopy, Array(
acquireShared -> probeDowngrade,
acquireExclusive -> probeInvalidate)))
def getGrantType(a: HasAcquireType, meta: ManagerMetadata): UInt =
Mux(a.isBuiltInType(), Acquire.getBuiltInGrantType(a.a_type),
Mux(a.a_type === acquireShared,
Mux(!dir.none(meta.sharers), grantShared, grantExclusive),
grantExclusive))
def getExclusiveGrantType(): UInt = grantExclusive
def managerMetadataOnRelease(incoming: HasReleaseType, src: UInt, meta: ManagerMetadata) = {
val popped = ManagerMetadata(sharers=dir.pop(meta.sharers, src))(meta.p)
MuxCase(meta, Array(
incoming.is(releaseInvalidateData) -> popped,
incoming.is(releaseInvalidateAck) -> popped))
}
}
/** A protocol with four Client states.
* Data is marked as dirty when written.
* Multiple clients may share read permissions on a block at the same time.
*/
class MESICoherence(dir: DirectoryRepresentation) extends CoherencePolicy(dir) {
// Message types
val nAcquireTypes = 2
val nProbeTypes = 3
val nReleaseTypes = 6
val nGrantTypes = 3
val acquireShared :: acquireExclusive :: Nil = Enum(UInt(), nAcquireTypes)
val probeInvalidate :: probeDowngrade :: probeCopy :: Nil = Enum(UInt(), nProbeTypes)
val releaseInvalidateData :: releaseDowngradeData :: releaseCopyData :: releaseInvalidateAck :: releaseDowngradeAck :: releaseCopyAck :: Nil = Enum(UInt(), nReleaseTypes)
val grantShared :: grantExclusive :: grantExclusiveAck :: Nil = Enum(UInt(), nGrantTypes)
def releaseTypesWithData = Seq(releaseInvalidateData, releaseDowngradeData, releaseCopyData)
def grantTypesWithData = Seq(grantShared, grantExclusive)
// Client states and functions
val nClientStates = 4
val clientInvalid :: clientShared :: clientExclusiveClean :: clientExclusiveDirty :: Nil = Enum(UInt(), nClientStates)
def clientStatesWithReadPermission = Seq(clientShared, clientExclusiveClean, clientExclusiveDirty)
def clientStatesWithWritePermission = Seq(clientExclusiveClean, clientExclusiveDirty)
def clientStatesWithDirtyData = Seq(clientExclusiveDirty)
def isValid(meta: ClientMetadata): Bool = meta.state =/= clientInvalid
def getAcquireType(cmd: UInt, meta: ClientMetadata): UInt =
Mux(isWriteIntent(cmd), acquireExclusive, acquireShared)
def getReleaseType(cmd: UInt, meta: ClientMetadata): UInt = {
val dirty = meta.state isOneOf clientStatesWithDirtyData
MuxLookup(cmd, releaseCopyAck, Array(
M_FLUSH -> Mux(dirty, releaseInvalidateData, releaseInvalidateAck),
M_PRODUCE -> Mux(dirty, releaseDowngradeData, releaseDowngradeAck),
M_CLEAN -> Mux(dirty, releaseCopyData, releaseCopyAck)))
}
def getReleaseType(incoming: HasProbeType, meta: ClientMetadata): UInt =
MuxLookup(incoming.p_type, releaseInvalidateAck, Array(
probeInvalidate -> getReleaseType(M_FLUSH, meta),
probeDowngrade -> getReleaseType(M_PRODUCE, meta),
probeCopy -> getReleaseType(M_PRODUCE, meta)))
def clientMetadataOnHit(cmd: UInt, meta: ClientMetadata) =
ClientMetadata(Mux(isWrite(cmd), clientExclusiveDirty, meta.state))(meta.p)
def clientMetadataOnCacheControl(cmd: UInt, meta: ClientMetadata) =
ClientMetadata(
MuxLookup(cmd, meta.state, Array(
M_FLUSH -> clientInvalid,
M_PRODUCE -> Mux(meta.state isOneOf clientStatesWithWritePermission,
clientShared, meta.state),
M_CLEAN -> Mux(meta.state === clientExclusiveDirty,
clientExclusiveClean, meta.state))))(meta.p)
def clientMetadataOnGrant(incoming: HasGrantType, cmd: UInt, meta: ClientMetadata) =
ClientMetadata(
Mux(incoming.isBuiltInType(), clientInvalid,
MuxLookup(incoming.g_type, clientInvalid, Array(
grantShared -> clientShared,
grantExclusive -> Mux(isWrite(cmd), clientExclusiveDirty, clientExclusiveClean),
grantExclusiveAck -> clientExclusiveDirty))))(meta.p)
def clientMetadataOnProbe(incoming: HasProbeType, meta: ClientMetadata) =
ClientMetadata(
MuxLookup(incoming.p_type, meta.state, Array(
probeInvalidate -> clientInvalid,
probeDowngrade -> clientShared,
probeCopy -> clientShared)))(meta.p)
// Manager states and functions:
val nManagerStates = 0 // TODO: We could add a Shared state to avoid probing
// only a single sharer (also would need
// notification msg to track clean drops)
// Also could avoid probes on outer WBs.
def requiresProbes(a: HasAcquireType, meta: ManagerMetadata) =
Mux(dir.none(meta.sharers), Bool(false),
Mux(dir.one(meta.sharers), Bool(true), //TODO: for now we assume it's Exclusive
Mux(a.isBuiltInType(), a.hasData(), a.a_type =/= acquireShared)))
def requiresProbes(cmd: UInt, meta: ManagerMetadata) = !dir.none(meta.sharers)
def getProbeType(cmd: UInt, meta: ManagerMetadata): UInt =
MuxLookup(cmd, probeCopy, Array(
M_FLUSH -> probeInvalidate,
M_PRODUCE -> probeDowngrade))
def getProbeType(a: HasAcquireType, meta: ManagerMetadata): UInt =
Mux(a.isBuiltInType(),
MuxLookup(a.a_type, probeCopy, Array(
Acquire.getBlockType -> probeCopy,
Acquire.putBlockType -> probeInvalidate,
Acquire.getType -> probeCopy,
Acquire.putType -> probeInvalidate,
Acquire.getPrefetchType -> probeCopy,
Acquire.putPrefetchType -> probeInvalidate,
Acquire.putAtomicType -> probeInvalidate)),
MuxLookup(a.a_type, probeCopy, Array(
acquireShared -> probeDowngrade,
acquireExclusive -> probeInvalidate)))
def getGrantType(a: HasAcquireType, meta: ManagerMetadata): UInt =
Mux(a.isBuiltInType(), Acquire.getBuiltInGrantType(a.a_type),
Mux(a.a_type === acquireShared,
Mux(!dir.none(meta.sharers), grantShared, grantExclusive),
grantExclusive))
def getExclusiveGrantType(): UInt = grantExclusive
def managerMetadataOnRelease(incoming: HasReleaseType, src: UInt, meta: ManagerMetadata) = {
val popped = ManagerMetadata(sharers=dir.pop(meta.sharers, src))(meta.p)
MuxCase(meta, Array(
incoming.is(releaseInvalidateData) -> popped,
incoming.is(releaseInvalidateAck) -> popped))
}
}
class MigratoryCoherence(dir: DirectoryRepresentation) extends CoherencePolicy(dir) {
// Message types
val nAcquireTypes = 3
val nProbeTypes = 4
val nReleaseTypes = 10
val nGrantTypes = 4
val acquireShared :: acquireExclusive :: acquireInvalidateOthers :: Nil = Enum(UInt(), nAcquireTypes)
val probeInvalidate :: probeDowngrade :: probeCopy :: probeInvalidateOthers :: Nil = Enum(UInt(), nProbeTypes)
val releaseInvalidateData :: releaseDowngradeData :: releaseCopyData :: releaseInvalidateAck :: releaseDowngradeAck :: releaseCopyAck :: releaseDowngradeDataMigratory :: releaseDowngradeAckHasCopy :: releaseInvalidateDataMigratory :: releaseInvalidateAckMigratory :: Nil = Enum(UInt(), nReleaseTypes)
val grantShared :: grantExclusive :: grantExclusiveAck :: grantReadMigratory :: Nil = Enum(UInt(), nGrantTypes)
def releaseTypesWithData = Seq(releaseInvalidateData, releaseDowngradeData, releaseCopyData, releaseInvalidateDataMigratory, releaseDowngradeDataMigratory)
def grantTypesWithData = Seq(grantShared, grantExclusive, grantReadMigratory)
// Client states and functions
val nClientStates = 7
val clientInvalid :: clientShared :: clientExclusiveClean :: clientExclusiveDirty :: clientSharedByTwo :: clientMigratoryClean :: clientMigratoryDirty :: Nil = Enum(UInt(), nClientStates)
def clientStatesWithReadPermission = Seq(clientShared, clientExclusiveClean, clientExclusiveDirty, clientSharedByTwo, clientMigratoryClean, clientMigratoryDirty)
def clientStatesWithWritePermission = Seq(clientExclusiveClean, clientExclusiveDirty, clientMigratoryClean, clientMigratoryDirty)
def clientStatesWithDirtyData = Seq(clientExclusiveDirty, clientMigratoryDirty)
def isValid (meta: ClientMetadata): Bool = meta.state =/= clientInvalid
def getAcquireType(cmd: UInt, meta: ClientMetadata): UInt =
Mux(isWriteIntent(cmd),
Mux(meta.state === clientInvalid, acquireExclusive, acquireInvalidateOthers),
acquireShared)
def getReleaseType(cmd: UInt, meta: ClientMetadata): UInt = {
val dirty = meta.state isOneOf clientStatesWithDirtyData
MuxLookup(cmd, releaseCopyAck, Array(
M_FLUSH -> Mux(dirty, releaseInvalidateData, releaseInvalidateAck),
M_PRODUCE -> Mux(dirty, releaseDowngradeData, releaseDowngradeAck),
M_CLEAN -> Mux(dirty, releaseCopyData, releaseCopyAck)))
}
def getReleaseType(incoming: HasProbeType, meta: ClientMetadata): UInt = {
val dirty = meta.state isOneOf clientStatesWithDirtyData
val with_data = MuxLookup(incoming.p_type, releaseInvalidateData, Array(
probeInvalidate -> Mux(meta.state isOneOf (clientExclusiveDirty, clientMigratoryDirty),
releaseInvalidateDataMigratory, releaseInvalidateData),
probeDowngrade -> Mux(meta.state === clientMigratoryDirty,
releaseDowngradeDataMigratory, releaseDowngradeData),
probeCopy -> releaseCopyData))
val without_data = MuxLookup(incoming.p_type, releaseInvalidateAck, Array(
probeInvalidate -> Mux(clientExclusiveClean === meta.state,
releaseInvalidateAckMigratory, releaseInvalidateAck),
probeInvalidateOthers -> Mux(clientSharedByTwo === meta.state,
releaseInvalidateAckMigratory, releaseInvalidateAck),
probeDowngrade -> Mux(meta.state =/= clientInvalid,
releaseDowngradeAckHasCopy, releaseDowngradeAck),
probeCopy -> Mux(meta.state =/= clientInvalid,
releaseDowngradeAckHasCopy, releaseDowngradeAck)))
Mux(dirty, with_data, without_data)
}
def clientMetadataOnHit(cmd: UInt, meta: ClientMetadata) =
ClientMetadata(
Mux(isWrite(cmd), MuxLookup(meta.state, clientExclusiveDirty, Array(
clientExclusiveClean -> clientExclusiveDirty,
clientMigratoryClean -> clientMigratoryDirty)),
meta.state))(meta.p)
def clientMetadataOnCacheControl(cmd: UInt, meta: ClientMetadata) =
ClientMetadata(
MuxLookup(cmd, meta.state, Array(
M_FLUSH -> clientInvalid,
M_PRODUCE -> Mux(meta.state isOneOf clientStatesWithWritePermission,
clientShared, meta.state),
M_CLEAN -> MuxLookup(meta.state, meta.state, Array(
clientExclusiveDirty -> clientExclusiveClean,
clientMigratoryDirty -> clientMigratoryClean)))))(meta.p)
def clientMetadataOnGrant(incoming: HasGrantType, cmd: UInt, meta: ClientMetadata) =
ClientMetadata(
Mux(incoming.isBuiltInType(), clientInvalid,
MuxLookup(incoming.g_type, clientInvalid, Array(
grantShared -> clientShared,
grantExclusive -> Mux(isWrite(cmd), clientExclusiveDirty, clientExclusiveClean),
grantExclusiveAck -> clientExclusiveDirty,
grantReadMigratory -> Mux(isWrite(cmd),
clientMigratoryDirty, clientMigratoryClean)))))(meta.p)
def clientMetadataOnProbe(incoming: HasProbeType, meta: ClientMetadata) = {
val downgradeState = MuxLookup(meta.state, clientShared, Array(
clientExclusiveClean -> clientSharedByTwo,
clientExclusiveDirty -> clientSharedByTwo,
clientSharedByTwo -> clientShared,
clientMigratoryClean -> clientSharedByTwo,
clientMigratoryDirty -> clientInvalid))
ClientMetadata(
MuxLookup(incoming.p_type, meta.state, Array(
probeInvalidate -> clientInvalid,
probeInvalidateOthers -> clientInvalid,
probeDowngrade -> downgradeState,
probeCopy -> downgradeState)))(meta.p)
}
// Manager states and functions:
val nManagerStates = 0 // TODO: we could add some states to reduce the number of message types
def requiresProbes(a: HasAcquireType, meta: ManagerMetadata) =
Mux(dir.none(meta.sharers), Bool(false),
Mux(dir.one(meta.sharers), Bool(true), //TODO: for now we assume it's Exclusive
Mux(a.isBuiltInType(), a.hasData(), a.a_type =/= acquireShared)))
def requiresProbes(cmd: UInt, meta: ManagerMetadata) = !dir.none(meta.sharers)
def getProbeType(cmd: UInt, meta: ManagerMetadata): UInt =
MuxLookup(cmd, probeCopy, Array(
M_FLUSH -> probeInvalidate,
M_PRODUCE -> probeDowngrade))
def getProbeType(a: HasAcquireType, meta: ManagerMetadata): UInt =
Mux(a.isBuiltInType(),
MuxLookup(a.a_type, probeCopy, Array(
Acquire.getBlockType -> probeCopy,
Acquire.putBlockType -> probeInvalidate,
Acquire.getType -> probeCopy,
Acquire.putType -> probeInvalidate,
Acquire.getPrefetchType -> probeCopy,
Acquire.putPrefetchType -> probeInvalidate,
Acquire.putAtomicType -> probeInvalidate)),
MuxLookup(a.a_type, probeCopy, Array(
acquireShared -> probeDowngrade,
acquireExclusive -> probeInvalidate,
acquireInvalidateOthers -> probeInvalidateOthers)))
def getGrantType(a: HasAcquireType, meta: ManagerMetadata): UInt =
Mux(a.isBuiltInType(), Acquire.getBuiltInGrantType(a.a_type),
MuxLookup(a.a_type, grantShared, Array(
acquireShared -> Mux(!dir.none(meta.sharers), grantShared, grantExclusive),
acquireExclusive -> grantExclusive,
acquireInvalidateOthers -> grantExclusiveAck))) //TODO: add this to MESI for broadcast?
def getExclusiveGrantType(): UInt = grantExclusive
def managerMetadataOnRelease(incoming: HasReleaseType, src: UInt, meta: ManagerMetadata) = {
val popped = ManagerMetadata(sharers=dir.pop(meta.sharers, src))(meta.p)
MuxCase(meta, Array(
incoming.is(releaseInvalidateData) -> popped,
incoming.is(releaseInvalidateAck) -> popped,
incoming.is(releaseInvalidateDataMigratory) -> popped,
incoming.is(releaseInvalidateAckMigratory) -> popped))
}
}

View File

@ -0,0 +1,424 @@
package uncore.converters
import Chisel._
import junctions._
import uncore.tilelink._
import uncore.util._
import uncore.constants._
import cde.{Parameters, Field}
import HastiConstants._
/* We need to translate TileLink requests into operations we can actually execute on AHB.
* The general plan of attack is:
* get => one AHB=>TL read
* put => [multiple AHB write fragments=>nill], one AHB write=>TL
* getBlock => AHB burst reads =>TL
* putBlock => AHB burst writes=>TL
* getPrefetch => noop=>TL
* putPrefetch => noop=>TL
* putAtomic => one AHB=>TL read, one idle, one AHB atom_write=>nill, one idle
*
* This requires that we support a pipeline of optional AHB requests with optional TL responses
*/
class AHBRequestIO(implicit p: Parameters) extends HastiMasterIO
with HasGrantType
with HasClientTransactionId
with HasTileLinkBeatId {
val executeAHB = Bool()
val respondTL = Bool()
val latchAtom = Bool()
val firstBurst = Bool()
val finalBurst = Bool()
val cmd = Bits(width = M_SZ) // atomic op
}
// AHB stage1: translate TileLink Acquires into AHBRequests
class AHBTileLinkIn(supportAtomics: Boolean = false)(implicit val p: Parameters) extends Module
with HasHastiParameters
with HasTileLinkParameters
with HasAddrMapParameters {
val io = new Bundle {
val acquire = new DecoupledIO(new Acquire).flip // NOTE: acquire must be either a Queue or a Pipe
val request = new DecoupledIO(new AHBRequestIO)
}
// Match the AHB burst with a TileLink {Put,Get}Block
val burstSize = tlDataBeats match {
case 1 => HBURST_SINGLE
// case 2 not supported by AHB
case 4 => HBURST_WRAP4
case 8 => HBURST_WRAP8
case 16 => HBURST_WRAP16
case _ => throw new java.lang.AssertionError("TileLink beats unsupported by AHB")
}
// Bursts start at 0 and wrap-around back to 0
val finalBurst = UInt(tlDataBeats-1, width = log2Up(tlDataBeats)).asUInt
val firstBurst = UInt(0, width = log2Up(tlDataBeats))
val next_wmask = Wire(UInt(width = tlDataBytes)) // calculated below
// State variables for processing more complicated TileLink Acquires
val s_atom_r :: s_atom_idle1 :: s_atom_w :: s_atom_idle2 :: Nil = Enum(UInt(), 4)
val atom_state = Reg(init = s_atom_r) // never changes if !supportAtomics
val done_wmask = Reg(init = UInt(0, width = tlDataBytes))
val burst = Reg(init = firstBurst)
// Grab some view of the TileLink acquire
val acq_wmask = io.acquire.bits.wmask()
val isReadBurst = io.acquire.bits.is(Acquire.getBlockType)
val isWriteBurst = io.acquire.bits.is(Acquire.putBlockType)
val isBurst = isWriteBurst || isReadBurst
val isAtomic = io.acquire.bits.is(Acquire.putAtomicType) && Bool(supportAtomics)
val isPut = io.acquire.bits.is(Acquire.putType)
// Final states?
val last_wmask = next_wmask === acq_wmask
val last_atom = atom_state === s_atom_idle2
val last_burst = burst === finalBurst
// Block the incoming request until we've fully consumed it
// NOTE: the outgoing grant.valid may happen while acquire.ready is still false;
// for this reason it is essential to have a Queue or a Pipe infront of acquire
io.acquire.ready := io.request.ready && MuxLookup(io.acquire.bits.a_type, Bool(true), Array(
Acquire.getType -> Bool(true),
Acquire.getBlockType -> last_burst, // hold it until the last beat is burst
Acquire.putType -> last_wmask, // only accept the put if we can fully consume its wmask
Acquire.putBlockType -> Bool(true),
Acquire.putAtomicType -> last_atom, // atomic operation stages complete
Acquire.getPrefetchType -> Bool(true),
Acquire.putPrefetchType -> Bool(true)))
// Advance the fragment state
when (io.request.ready && io.acquire.valid && isPut) {
when (last_wmask) { // if this was the last fragment, restart FSM
done_wmask := UInt(0)
} .otherwise {
done_wmask := next_wmask
}
}
// Advance the burst state
// We assume here that TileLink gives us all putBlock beats with nothing between them
when (io.request.ready && io.acquire.valid && isBurst) {
when (last_burst) {
burst := UInt(0)
} .otherwise {
burst := burst + UInt(1)
}
}
// Advance the atomic state machine
when (io.request.ready && io.acquire.valid && isAtomic) {
switch (atom_state) {
is (s_atom_r) { atom_state := s_atom_idle1 }
is (s_atom_idle1) { atom_state := s_atom_w } // idle1 => AMOALU runs on a different clock than AHB slave read
is (s_atom_w) { atom_state := s_atom_idle2 }
is (s_atom_idle2) { atom_state := s_atom_r } // idle2 state is required by AHB after hmastlock is lowered
}
}
// Returns (range=0, range=-1, aligned_wmask, size)
def mask_helper(in_0 : Bool, range : UInt): (Bool, Bool, UInt, UInt) = {
val len = range.getWidth
if (len == 1) {
(range === UInt(0), range === UInt(1), in_0.asUInt() & range, UInt(0))
} else {
val mid = len / 2
val lo = range(mid-1, 0)
val hi = range(len-1, mid)
val (lo_0, lo_1, lo_m, lo_s) = mask_helper(in_0, lo)
val (hi_0, hi_1, hi_m, hi_s) = mask_helper(in_0 && lo_0, hi)
val out_0 = lo_0 && hi_0
val out_1 = lo_1 && hi_1
val out_m = Cat(hi_m, lo_m) | Fill(len, (in_0 && out_1).asUInt())
val out_s = Mux(out_1, UInt(log2Up(len)), Mux(lo_0, hi_s, lo_s))
(out_0, out_1, out_m, out_s)
}
}
val pending_wmask = acq_wmask & ~done_wmask
val put_addr = PriorityEncoder(pending_wmask)
val (wmask_0, _, exec_wmask, put_size) = mask_helper(Bool(true), pending_wmask)
next_wmask := done_wmask | exec_wmask
// Calculate the address, with consideration to put fragments and bursts
val addr_block = io.acquire.bits.addr_block
val addr_beatin= io.acquire.bits.addr_beat
val addr_burst = Mux(isReadBurst, addr_beatin + burst, addr_beatin)
val addr_byte = Mux(isPut, put_addr, io.acquire.bits.addr_byte())
val addr_beat = Mux(isWriteBurst, UInt(0), addr_burst)
val ahbAddr = Cat(addr_block, addr_burst, addr_byte)
val ahbSize = Mux(isPut, put_size, Mux(isBurst, UInt(log2Ceil(tlDataBytes)), io.acquire.bits.op_size()))
val ahbBurst = MuxLookup(io.acquire.bits.a_type, HBURST_SINGLE, Array(
Acquire.getType -> HBURST_SINGLE,
Acquire.getBlockType -> burstSize,
Acquire.putType -> HBURST_SINGLE,
Acquire.putBlockType -> burstSize,
Acquire.putAtomicType -> HBURST_SINGLE,
Acquire.getPrefetchType -> HBURST_SINGLE,
Acquire.putPrefetchType -> HBURST_SINGLE))
val ahbWrite = MuxLookup(io.acquire.bits.a_type, Bool(false), Array(
Acquire.getType -> Bool(false),
Acquire.getBlockType -> Bool(false),
Acquire.putType -> Bool(true),
Acquire.putBlockType -> Bool(true),
Acquire.putAtomicType -> MuxLookup(atom_state, Bool(false), Array(
s_atom_r -> Bool(false),
s_atom_idle1 -> Bool(false), // don't care
s_atom_w -> Bool(true),
s_atom_idle2 -> Bool(true))), // don't care
Acquire.getPrefetchType -> Bool(false), // don't care
Acquire.putPrefetchType -> Bool(true))) // don't care
val ahbExecute = MuxLookup(io.acquire.bits.a_type, Bool(false), Array(
Acquire.getType -> Bool(true),
Acquire.getBlockType -> Bool(true),
Acquire.putType -> !wmask_0, // handle the case of a Put with no bytes!
Acquire.putBlockType -> Bool(true),
Acquire.putAtomicType -> MuxLookup(atom_state, Bool(false), Array(
s_atom_r -> Bool(true),
s_atom_idle1 -> Bool(false),
s_atom_w -> Bool(true),
s_atom_idle2 -> Bool(false))),
Acquire.getPrefetchType -> Bool(false),
Acquire.putPrefetchType -> Bool(false)))
val respondTL = MuxLookup(io.acquire.bits.a_type, Bool(false), Array(
Acquire.getType -> Bool(true),
Acquire.getBlockType -> Bool(true),
Acquire.putType -> last_wmask,
Acquire.putBlockType -> last_burst,
Acquire.putAtomicType -> MuxLookup(atom_state, Bool(false), Array(
s_atom_r -> Bool(true), // they want the old data
s_atom_idle1 -> Bool(false),
s_atom_w -> Bool(false),
s_atom_idle2 -> Bool(false))),
Acquire.getPrefetchType -> Bool(true),
Acquire.putPrefetchType -> Bool(true)))
io.request.valid := io.acquire.valid
io.request.bits.htrans := HTRANS_IDLE // unused/ignored
io.request.bits.haddr := ahbAddr
io.request.bits.hmastlock := isAtomic && atom_state =/= s_atom_idle2
io.request.bits.hwrite := ahbWrite
io.request.bits.hburst := ahbBurst
io.request.bits.hsize := ahbSize
io.request.bits.hprot := HPROT_DATA | HPROT_PRIVILEGED
io.request.bits.hwdata := io.acquire.bits.data
io.request.bits.executeAHB := ahbExecute
io.request.bits.respondTL := respondTL
io.request.bits.latchAtom := isAtomic && atom_state === s_atom_r
io.request.bits.firstBurst := burst === firstBurst
io.request.bits.finalBurst := burst === finalBurst || !isBurst
io.request.bits.cmd := io.acquire.bits.op_code()
io.request.bits.is_builtin_type := Bool(true)
io.request.bits.g_type := io.acquire.bits.getBuiltInGrantType()
io.request.bits.client_xact_id := io.acquire.bits.client_xact_id
io.request.bits.addr_beat := addr_beat
val debugBurst = Reg(UInt())
when (io.request.valid) {
debugBurst := addr_burst - burst
}
// We only support built-in TileLink requests
assert(!io.acquire.valid || io.acquire.bits.is_builtin_type, "AHB bridge only supports builtin TileLink types")
// Ensure alignment of address to size
assert(!io.acquire.valid || (ahbAddr & ((UInt(1) << ahbSize) - UInt(1))) === UInt(0), "TileLink operation misaligned")
// If this is a putBlock, make sure it moves properly
assert(!io.acquire.valid || !isBurst || burst === firstBurst || debugBurst === addr_burst - burst, "TileLink putBlock beats not sequential")
// We better not get an incomplete TileLink acquire
assert(!io.acquire.valid || isBurst || burst === firstBurst, "TileLink never completed a putBlock")
// If we disabled atomic support, we better not see a request
assert(!io.acquire.bits.is(Acquire.putAtomicType) || Bool(supportAtomics))
}
// AHB stage2: execute AHBRequests
class AHBBusMaster(supportAtomics: Boolean = false)(implicit val p: Parameters) extends Module
with HasHastiParameters
with HasTileLinkParameters
with HasAddrMapParameters {
val io = new Bundle {
val request = new DecoupledIO(new AHBRequestIO).flip
val grant = new DecoupledIO(new Grant)
val ahb = new HastiMasterIO()
}
// All AHB outputs are registered (they might be IOs)
val midBurst = Reg(init = Bool(false))
val htrans = Reg(init = HTRANS_IDLE)
val haddr = Reg(UInt())
val hmastlock = Reg(init = Bool(false))
val hwrite = Reg(Bool())
val hburst = Reg(UInt())
val hsize = Reg(init = UInt(0, width = SZ_HSIZE))
val hprot = Reg(UInt())
val hwdata0 = Reg(Bits())
val hwdata1 = Reg(Bits())
val hrdata = Reg(Bits())
io.ahb.htrans := htrans
io.ahb.haddr := haddr
io.ahb.hmastlock := hmastlock
io.ahb.hwrite := hwrite
io.ahb.hburst := hburst
io.ahb.hsize := hsize
io.ahb.hprot := hprot
io.ahb.hwdata := hwdata1 // one cycle after the address phase
// TileLink response data needed in data phase
val respondTL0 = Reg(init = Bool(false))
val respondTL1 = Reg(init = Bool(false))
val latchAtom0 = Reg(init = Bool(false))
val latchAtom1 = Reg(init = Bool(false))
val executeAHB0 = Reg(init = Bool(false))
val executeAHB1 = Reg(init = Bool(false))
val bubble = Reg(init = Bool(true)) // nothing useful in address phase
val cmd = Reg(Bits())
val g_type0 = Reg(UInt())
val g_type1 = Reg(UInt())
val client_xact_id0 = Reg(Bits())
val client_xact_id1 = Reg(Bits())
val addr_beat0 = Reg(UInt())
val addr_beat1 = Reg(UInt())
val grant1 = Reg(new Grant)
// It is allowed to progress from Idle/Busy during a wait state
val addrReady = io.ahb.hready || bubble || (!executeAHB1 && !executeAHB0)
val dataReady = io.ahb.hready || !executeAHB1
// Only accept a new AHBRequest if we have enough buffer space in the pad
// to accomodate a persistent drop in TileLink's grant.ready
io.request.ready := addrReady && io.grant.ready
// htrans must be updated even if no request is valid
when (addrReady) {
when (io.request.fire() && io.request.bits.executeAHB) {
midBurst := !io.request.bits.finalBurst
when (io.request.bits.firstBurst) {
htrans := HTRANS_NONSEQ
} .otherwise {
htrans := HTRANS_SEQ
}
} .otherwise {
when (midBurst) {
htrans := HTRANS_BUSY
} .otherwise {
htrans := HTRANS_IDLE
}
}
}
// Address phase, clear repondTL when we have nothing to do
when (addrReady) {
when (io.request.fire()) {
respondTL0 := io.request.bits.respondTL
latchAtom0 := io.request.bits.latchAtom
executeAHB0:= io.request.bits.executeAHB
bubble := Bool(false)
} .otherwise {
respondTL0 := Bool(false)
latchAtom0 := Bool(false)
executeAHB0:= Bool(false)
bubble := Bool(true) // an atom-injected Idle is not a bubble!
}
}
// Transfer bulk address phase
when (io.request.fire()) {
haddr := io.request.bits.haddr
hmastlock := io.request.bits.hmastlock
hwrite := io.request.bits.hwrite
hburst := io.request.bits.hburst
hsize := io.request.bits.hsize
hprot := io.request.bits.hprot
hwdata0 := io.request.bits.hwdata
cmd := io.request.bits.cmd
g_type0 := io.request.bits.g_type
client_xact_id0 := io.request.bits.client_xact_id
addr_beat0 := io.request.bits.addr_beat
}
// Execute Atomic ops; unused and optimized away if !supportAtomics
val amo_p = p.alterPartial({
case CacheBlockOffsetBits => hastiAddrBits
})
val alu = Module(new AMOALU(hastiDataBits, rhsIsAligned = true)(amo_p))
alu.io.addr := haddr
alu.io.cmd := cmd
alu.io.typ := hsize
alu.io.rhs := hwdata0
alu.io.lhs := hrdata
// Transfer bulk data phase
when (dataReady) {
when (addrReady) {
respondTL1 := respondTL0
latchAtom1 := latchAtom0
executeAHB1 := executeAHB0
} .otherwise {
respondTL1 := Bool(false)
latchAtom1 := Bool(false)
executeAHB1 := Bool(false)
}
hwdata1 := Mux(Bool(supportAtomics), alu.io.out, hwdata0)
g_type1 := g_type0
client_xact_id1 := client_xact_id0
addr_beat1 := addr_beat0
}
// Latch the read result for an atomic operation
when (dataReady && latchAtom1) {
hrdata := io.ahb.hrdata
}
// Only issue TL grant when the slave has provided data
io.grant.valid := dataReady && respondTL1
io.grant.bits := Grant(
is_builtin_type = Bool(true),
g_type = g_type1,
client_xact_id = client_xact_id1,
manager_xact_id = UInt(0),
addr_beat = addr_beat1,
data = io.ahb.hrdata)
// We cannot support errors from AHB to TileLink
assert(!io.ahb.hresp, "AHB hresp error detected and cannot be reported via TileLink")
}
class AHBBridge(supportAtomics: Boolean = true)(implicit val p: Parameters) extends Module
with HasHastiParameters
with HasTileLinkParameters
with HasAddrMapParameters {
val io = new Bundle {
val tl = new ClientUncachedTileLinkIO().flip
val ahb = new HastiMasterIO()
}
// Hasti and TileLink widths must agree at this point in the topology
require (tlDataBits == hastiDataBits)
require (p(PAddrBits) == hastiAddrBits)
// AHB does not permit bursts to cross a 1KB boundary
require (tlDataBits * tlDataBeats <= 1024*8)
// tlDataBytes must be a power of 2
require (1 << log2Ceil(tlDataBytes) == tlDataBytes)
// Create the sub-blocks
val fsm = Module(new AHBTileLinkIn(supportAtomics))
val bus = Module(new AHBBusMaster(supportAtomics))
val pad = Module(new Queue(new Grant, 4))
fsm.io.acquire <> Queue(io.tl.acquire, 2) // Pipe is also acceptable
bus.io.request <> fsm.io.request
io.ahb <> bus.io.ahb
io.tl.grant <> pad.io.deq
// The pad is needed to absorb AHB progress while !grant.ready
// We are only 'ready' if the pad has at least 3 cycles of space
bus.io.grant.ready := pad.io.count <= UInt(1)
pad.io.enq.bits := bus.io.grant.bits
pad.io.enq.valid := bus.io.grant.valid
}

View File

@ -0,0 +1,383 @@
package uncore.converters
import Chisel._
import junctions._
import uncore.tilelink._
import uncore.constants._
import cde.Parameters
import scala.math.min
class IdMapper(val inIdBits: Int, val outIdBits: Int,
val forceMapping: Boolean = false)
(implicit val p: Parameters) extends Module {
val io = new Bundle {
val req = new Bundle {
val valid = Bool(INPUT)
val ready = Bool(OUTPUT)
val in_id = UInt(INPUT, inIdBits)
val out_id = UInt(OUTPUT, outIdBits)
}
val resp = new Bundle {
val valid = Bool(INPUT)
val matches = Bool(OUTPUT)
val out_id = UInt(INPUT, outIdBits)
val in_id = UInt(OUTPUT, inIdBits)
}
}
val maxInXacts = 1 << inIdBits
if (inIdBits <= outIdBits && !forceMapping) {
io.req.ready := Bool(true)
io.req.out_id := io.req.in_id
io.resp.matches := Bool(true)
io.resp.in_id := io.resp.out_id
} else {
val nInXacts = 1 << inIdBits
// No point in allowing more out xacts than in xacts
val nOutXacts = min(1 << outIdBits, nInXacts)
val out_id_free = Reg(init = Vec.fill(nOutXacts){Bool(true)})
val in_id_free = Reg(init = Vec.fill(nInXacts){Bool(true)})
val next_out_id = PriorityEncoder(out_id_free)
val id_mapping = Reg(Vec(nOutXacts, UInt(0, inIdBits)))
val req_fire = io.req.valid && io.req.ready
when (req_fire) {
out_id_free(io.req.out_id) := Bool(false)
in_id_free(io.req.in_id) := Bool(false)
id_mapping(io.req.out_id) := io.req.in_id
}
when (io.resp.valid) {
out_id_free(io.resp.out_id) := Bool(true)
in_id_free(io.resp.in_id) := Bool(true)
}
io.req.ready := out_id_free.reduce(_ || _) && in_id_free(io.req.in_id)
io.req.out_id := next_out_id
io.resp.in_id := id_mapping(io.resp.out_id)
io.resp.matches := !out_id_free(io.resp.out_id)
}
}
class NastiIOTileLinkIOConverterInfo(implicit p: Parameters) extends TLBundle()(p) {
val addr_beat = UInt(width = tlBeatAddrBits)
val subblock = Bool()
}
class NastiIOTileLinkIOConverter(implicit p: Parameters) extends TLModule()(p)
with HasNastiParameters {
val io = new Bundle {
val tl = new ClientUncachedTileLinkIO().flip
val nasti = new NastiIO
}
val dataBits = tlDataBits*tlDataBeats
require(tlDataBits == nastiXDataBits, "Data sizes between LLC and MC don't agree") // TODO: remove this restriction
require(tlDataBeats < (1 << nastiXLenBits), "Can't have that many beats")
val has_data = io.tl.acquire.bits.hasData()
val is_subblock = io.tl.acquire.bits.isSubBlockType()
val is_multibeat = io.tl.acquire.bits.hasMultibeatData()
val (tl_cnt_out, tl_wrap_out) = Counter(
io.tl.acquire.fire() && is_multibeat, tlDataBeats)
val get_valid = io.tl.acquire.valid && !has_data
val put_valid = io.tl.acquire.valid && has_data
// Reorder queue saves extra information needed to send correct
// grant back to TL client
val roqIdBits = min(tlClientXactIdBits, nastiXIdBits)
val roq = Module(new ReorderQueue(
new NastiIOTileLinkIOConverterInfo, roqIdBits))
val get_id_mapper = Module(new IdMapper(tlClientXactIdBits, nastiXIdBits))
val put_id_mapper = Module(new IdMapper(tlClientXactIdBits, nastiXIdBits))
val get_id_ready = get_id_mapper.io.req.ready
val put_id_mask = is_subblock || io.tl.acquire.bits.addr_beat === UInt(0)
val put_id_ready = put_id_mapper.io.req.ready || !put_id_mask
// For Get/GetBlock, make sure Reorder queue can accept new entry
val get_helper = DecoupledHelper(
get_valid,
roq.io.enq.ready,
io.nasti.ar.ready,
get_id_ready)
val w_inflight = Reg(init = Bool(false))
val w_id_reg = Reg(init = UInt(0, nastiXIdBits))
val w_id = Mux(w_inflight, w_id_reg, put_id_mapper.io.req.out_id)
// For Put/PutBlock, make sure aw and w channel are both ready before
// we send the first beat
val aw_ready = w_inflight || io.nasti.aw.ready
val put_helper = DecoupledHelper(
put_valid,
aw_ready,
io.nasti.w.ready,
put_id_ready)
val (nasti_cnt_out, nasti_wrap_out) = Counter(
io.nasti.r.fire() && !roq.io.deq.data.subblock, tlDataBeats)
roq.io.enq.valid := get_helper.fire(roq.io.enq.ready)
roq.io.enq.bits.tag := io.nasti.ar.bits.id
roq.io.enq.bits.data.addr_beat := io.tl.acquire.bits.addr_beat
roq.io.enq.bits.data.subblock := is_subblock
roq.io.deq.valid := io.nasti.r.fire() && (nasti_wrap_out || roq.io.deq.data.subblock)
roq.io.deq.tag := io.nasti.r.bits.id
get_id_mapper.io.req.valid := get_helper.fire(get_id_ready)
get_id_mapper.io.req.in_id := io.tl.acquire.bits.client_xact_id
get_id_mapper.io.resp.valid := io.nasti.r.fire() && io.nasti.r.bits.last
get_id_mapper.io.resp.out_id := io.nasti.r.bits.id
put_id_mapper.io.req.valid := put_helper.fire(put_id_ready, put_id_mask)
put_id_mapper.io.req.in_id := io.tl.acquire.bits.client_xact_id
put_id_mapper.io.resp.valid := io.nasti.b.fire()
put_id_mapper.io.resp.out_id := io.nasti.b.bits.id
// Decompose outgoing TL Acquires into Nasti address and data channels
io.nasti.ar.valid := get_helper.fire(io.nasti.ar.ready)
io.nasti.ar.bits := NastiReadAddressChannel(
id = get_id_mapper.io.req.out_id,
addr = io.tl.acquire.bits.full_addr(),
size = Mux(is_subblock,
io.tl.acquire.bits.op_size(),
UInt(log2Ceil(tlDataBytes))),
len = Mux(is_subblock, UInt(0), UInt(tlDataBeats - 1)))
def mask_helper(all_inside_0: Seq[Bool], defsize: Int): (Seq[Bool], UInt, UInt) = {
val len = all_inside_0.size
if (len == 1) {
(Seq(Bool(true)), UInt(0), UInt(defsize))
} else {
val sub_inside_0 = Seq.tabulate (len/2) { i => all_inside_0(2*i) && all_inside_0(2*i+1) }
val (sub_outside_0, sub_offset, sub_size) = mask_helper(sub_inside_0, defsize+1)
val all_outside_0 = Seq.tabulate (len) { i => sub_outside_0(i/2) && all_inside_0(i^1) }
val odd_outside_0 = Seq.tabulate (len/2) { i => all_outside_0(2*i+1) }
val odd_outside = odd_outside_0.reduce (_ || _)
val all_outside = all_outside_0.reduce (_ || _)
val offset = Cat(sub_offset, odd_outside)
val size = Mux(all_outside, UInt(defsize), sub_size)
(all_outside_0, offset, size)
}
}
val all_inside_0 = (~io.tl.acquire.bits.wmask()).toBools
val (_, put_offset, put_size) = mask_helper(all_inside_0, 0)
io.nasti.aw.valid := put_helper.fire(aw_ready, !w_inflight)
io.nasti.aw.bits := NastiWriteAddressChannel(
id = put_id_mapper.io.req.out_id,
addr = io.tl.acquire.bits.full_addr() |
Mux(is_multibeat, UInt(0), put_offset),
size = Mux(is_multibeat, UInt(log2Ceil(tlDataBytes)), put_size),
len = Mux(is_multibeat, UInt(tlDataBeats - 1), UInt(0)))
io.nasti.w.valid := put_helper.fire(io.nasti.w.ready)
io.nasti.w.bits := NastiWriteDataChannel(
id = w_id,
data = io.tl.acquire.bits.data,
strb = Some(io.tl.acquire.bits.wmask()),
last = Mux(w_inflight,
tl_cnt_out === UInt(tlDataBeats - 1), !is_multibeat))
io.tl.acquire.ready := Mux(has_data,
put_helper.fire(put_valid),
get_helper.fire(get_valid))
when (!w_inflight && io.tl.acquire.fire() && is_multibeat) {
w_inflight := Bool(true)
w_id_reg := w_id
}
when (w_inflight) {
when (tl_wrap_out) { w_inflight := Bool(false) }
}
// Aggregate incoming NASTI responses into TL Grants
val (tl_cnt_in, tl_wrap_in) = Counter(
io.tl.grant.fire() && io.tl.grant.bits.hasMultibeatData(), tlDataBeats)
val gnt_arb = Module(new LockingArbiter(new GrantToDst, 2,
tlDataBeats, Some((gnt: GrantToDst) => gnt.hasMultibeatData())))
io.tl.grant <> gnt_arb.io.out
gnt_arb.io.in(0).valid := io.nasti.r.valid
io.nasti.r.ready := gnt_arb.io.in(0).ready
gnt_arb.io.in(0).bits := Grant(
is_builtin_type = Bool(true),
g_type = Mux(roq.io.deq.data.subblock,
Grant.getDataBeatType, Grant.getDataBlockType),
client_xact_id = get_id_mapper.io.resp.in_id,
manager_xact_id = UInt(0),
addr_beat = Mux(roq.io.deq.data.subblock, roq.io.deq.data.addr_beat, tl_cnt_in),
data = io.nasti.r.bits.data)
assert(!roq.io.deq.valid || roq.io.deq.matches,
"TL -> NASTI converter ReorderQueue: NASTI tag error")
assert(!gnt_arb.io.in(0).valid || get_id_mapper.io.resp.matches,
"TL -> NASTI ID Mapper: NASTI tag error")
gnt_arb.io.in(1).valid := io.nasti.b.valid
io.nasti.b.ready := gnt_arb.io.in(1).ready
gnt_arb.io.in(1).bits := Grant(
is_builtin_type = Bool(true),
g_type = Grant.putAckType,
client_xact_id = put_id_mapper.io.resp.in_id,
manager_xact_id = UInt(0),
addr_beat = UInt(0),
data = Bits(0))
assert(!gnt_arb.io.in(1).valid || put_id_mapper.io.resp.matches, "NASTI tag error")
assert(!io.nasti.r.valid || io.nasti.r.bits.resp === UInt(0), "NASTI read error")
assert(!io.nasti.b.valid || io.nasti.b.bits.resp === UInt(0), "NASTI write error")
}
class TileLinkIONastiIOConverter(implicit p: Parameters) extends TLModule()(p)
with HasNastiParameters {
val io = new Bundle {
val nasti = (new NastiIO).flip
val tl = new ClientUncachedTileLinkIO
}
val (s_idle :: s_put :: Nil) = Enum(Bits(), 2)
val state = Reg(init = s_idle)
private val blockOffset = tlByteAddrBits + tlBeatAddrBits
val aw_req = Reg(new NastiWriteAddressChannel)
val w_tl_id = Reg(io.tl.acquire.bits.client_xact_id)
def is_singlebeat(chan: NastiAddressChannel): Bool =
chan.len === UInt(0)
def is_multibeat(chan: NastiAddressChannel): Bool =
chan.len === UInt(tlDataBeats - 1) && chan.size === UInt(log2Up(tlDataBytes))
def nasti_addr_block(chan: NastiAddressChannel): UInt =
chan.addr(nastiXAddrBits - 1, blockOffset)
def nasti_addr_beat(chan: NastiAddressChannel): UInt =
chan.addr(blockOffset - 1, tlByteAddrBits)
def nasti_addr_byte(chan: NastiAddressChannel): UInt =
chan.addr(tlByteAddrBits - 1, 0)
def size_mask(size: UInt): UInt =
(UInt(1) << (UInt(1) << size)) - UInt(1)
def nasti_wmask(aw: NastiWriteAddressChannel, w: NastiWriteDataChannel): UInt = {
val base = w.strb & size_mask(aw.size)
val addr_byte = nasti_addr_byte(aw)
w.strb & (size_mask(aw.size) << addr_byte)
}
def tl_last(gnt: GrantMetadata): Bool =
!gnt.hasMultibeatData() || gnt.addr_beat === UInt(tlDataBeats - 1)
def tl_b_grant(gnt: GrantMetadata): Bool =
gnt.g_type === Grant.putAckType
assert(!io.nasti.ar.valid ||
is_singlebeat(io.nasti.ar.bits) || is_multibeat(io.nasti.ar.bits),
"NASTI read transaction cannot convert to TileLInk")
assert(!io.nasti.aw.valid ||
is_singlebeat(io.nasti.aw.bits) || is_multibeat(io.nasti.aw.bits),
"NASTI write transaction cannot convert to TileLInk")
val put_count = Reg(init = UInt(0, tlBeatAddrBits))
val get_id_mapper = Module(new IdMapper(nastiXIdBits, tlClientXactIdBits, true))
val put_id_mapper = Module(new IdMapper(nastiXIdBits, tlClientXactIdBits, true))
when (io.nasti.aw.fire()) {
aw_req := io.nasti.aw.bits
w_tl_id := put_id_mapper.io.req.out_id
state := s_put
}
when (io.nasti.w.fire()) {
put_count := put_count + UInt(1)
when (io.nasti.w.bits.last) {
put_count := UInt(0)
state := s_idle
}
}
val get_acquire = Mux(is_multibeat(io.nasti.ar.bits),
GetBlock(
client_xact_id = get_id_mapper.io.req.out_id,
addr_block = nasti_addr_block(io.nasti.ar.bits)),
Get(
client_xact_id = get_id_mapper.io.req.out_id,
addr_block = nasti_addr_block(io.nasti.ar.bits),
addr_beat = nasti_addr_beat(io.nasti.ar.bits),
addr_byte = nasti_addr_byte(io.nasti.ar.bits),
operand_size = io.nasti.ar.bits.size,
alloc = Bool(false)))
val put_acquire = Mux(is_multibeat(aw_req),
PutBlock(
client_xact_id = w_tl_id,
addr_block = nasti_addr_block(aw_req),
addr_beat = put_count,
data = io.nasti.w.bits.data,
wmask = Some(io.nasti.w.bits.strb)),
Put(
client_xact_id = w_tl_id,
addr_block = nasti_addr_block(aw_req),
addr_beat = nasti_addr_beat(aw_req),
data = io.nasti.w.bits.data,
wmask = Some(nasti_wmask(aw_req, io.nasti.w.bits))))
val get_helper = DecoupledHelper(
io.nasti.ar.valid,
get_id_mapper.io.req.ready,
io.tl.acquire.ready)
get_id_mapper.io.req.valid := get_helper.fire(
get_id_mapper.io.req.ready, state === s_idle)
get_id_mapper.io.req.in_id := io.nasti.ar.bits.id
get_id_mapper.io.resp.out_id := io.tl.grant.bits.client_xact_id
get_id_mapper.io.resp.valid := io.nasti.r.fire() && io.nasti.r.bits.last
val aw_ok = (state === s_idle && !io.nasti.ar.valid)
put_id_mapper.io.req.valid := aw_ok && io.nasti.aw.valid
put_id_mapper.io.req.in_id := io.nasti.aw.bits.id
put_id_mapper.io.resp.out_id := io.tl.grant.bits.client_xact_id
put_id_mapper.io.resp.valid := io.nasti.b.fire()
io.tl.acquire.bits := Mux(state === s_put, put_acquire, get_acquire)
io.tl.acquire.valid := get_helper.fire(io.tl.acquire.ready, state === s_idle) ||
(state === s_put && io.nasti.w.valid)
io.nasti.ar.ready := get_helper.fire(io.nasti.ar.valid, state === s_idle)
io.nasti.aw.ready := aw_ok && put_id_mapper.io.req.ready
io.nasti.w.ready := (state === s_put && io.tl.acquire.ready)
val nXacts = tlMaxClientXacts * tlMaxClientsPerPort
io.nasti.b.valid := io.tl.grant.valid && tl_b_grant(io.tl.grant.bits)
io.nasti.b.bits := NastiWriteResponseChannel(
id = put_id_mapper.io.resp.in_id)
assert(!io.nasti.b.valid || put_id_mapper.io.resp.matches,
"Put ID does not match")
io.nasti.r.valid := io.tl.grant.valid && !tl_b_grant(io.tl.grant.bits)
io.nasti.r.bits := NastiReadDataChannel(
id = get_id_mapper.io.resp.in_id,
data = io.tl.grant.bits.data,
last = tl_last(io.tl.grant.bits))
assert(!io.nasti.r.valid || get_id_mapper.io.resp.matches,
"Get ID does not match")
io.tl.grant.ready := Mux(tl_b_grant(io.tl.grant.bits),
io.nasti.b.ready, io.nasti.r.ready)
}

View File

@ -0,0 +1,32 @@
// See LICENSE for details
package uncore.converters
import Chisel._
import junctions._
import uncore.tilelink._
import cde.Parameters
/** Convert TileLink protocol to Smi protocol */
class SmiIOTileLinkIOConverter(val dataWidth: Int, val addrWidth: Int)
(implicit p: Parameters) extends Module {
val io = new Bundle {
val tl = (new ClientUncachedTileLinkIO).flip
val smi = new SmiIO(dataWidth, addrWidth)
}
def decoupledNastiConnect(outer: NastiIO, inner: NastiIO) {
outer.ar <> Queue(inner.ar)
outer.aw <> Queue(inner.aw)
outer.w <> Queue(inner.w)
inner.r <> Queue(outer.r)
inner.b <> Queue(outer.b)
}
val tl2nasti = Module(new NastiIOTileLinkIOConverter())
val nasti2smi = Module(new SmiIONastiIOConverter(dataWidth, addrWidth))
tl2nasti.io.tl <> io.tl
decoupledNastiConnect(nasti2smi.io.nasti, tl2nasti.io.nasti)
io.smi <> nasti2smi.io.smi
}

View File

@ -0,0 +1,681 @@
package uncore.converters
import Chisel._
import junctions._
import uncore.tilelink._
import uncore.util._
import uncore.constants._
import cde.Parameters
/** Utilities for safely wrapping a *UncachedTileLink by pinning probe.ready and release.valid low */
object TileLinkIOWrapper {
def apply(tl: ClientUncachedTileLinkIO)(implicit p: Parameters): ClientTileLinkIO = {
val conv = Module(new ClientTileLinkIOWrapper)
conv.io.in <> tl
conv.io.out
}
def apply(tl: UncachedTileLinkIO)(implicit p: Parameters): TileLinkIO = {
val conv = Module(new TileLinkIOWrapper)
conv.io.in <> tl
conv.io.out
}
def apply(tl: ClientTileLinkIO): ClientTileLinkIO = tl
def apply(tl: TileLinkIO): TileLinkIO = tl
}
class TileLinkIOWrapper(implicit p: Parameters) extends TLModule()(p) {
val io = new Bundle {
val in = new UncachedTileLinkIO().flip
val out = new TileLinkIO
}
io.out.acquire <> io.in.acquire
io.in.grant <> io.out.grant
io.out.finish <> io.in.finish
io.out.probe.ready := Bool(true)
io.out.release.valid := Bool(false)
}
class ClientTileLinkIOWrapper(implicit p: Parameters) extends TLModule()(p) {
val io = new Bundle {
val in = new ClientUncachedTileLinkIO().flip
val out = new ClientTileLinkIO
}
io.out.acquire <> io.in.acquire
io.in.grant <> io.out.grant
io.out.probe.ready := Bool(true)
io.out.release.valid := Bool(false)
}
class ClientTileLinkIOUnwrapper(implicit p: Parameters) extends TLModule()(p) {
val io = new Bundle {
val in = new ClientTileLinkIO().flip
val out = new ClientUncachedTileLinkIO
}
val acqArb = Module(new LockingRRArbiter(new Acquire, 2, tlDataBeats,
Some((acq: Acquire) => acq.hasMultibeatData())))
val acqRoq = Module(new ReorderQueue(Bool(), tlClientXactIdBits))
val relRoq = Module(new ReorderQueue(Bool(), tlClientXactIdBits))
val iacq = io.in.acquire.bits
val irel = io.in.release.bits
val ognt = io.out.grant.bits
val acq_roq_enq = iacq.first()
val rel_roq_enq = irel.first()
val acq_roq_ready = !acq_roq_enq || acqRoq.io.enq.ready
val rel_roq_ready = !rel_roq_enq || relRoq.io.enq.ready
val acq_helper = DecoupledHelper(
io.in.acquire.valid,
acq_roq_ready,
acqArb.io.in(0).ready)
val rel_helper = DecoupledHelper(
io.in.release.valid,
rel_roq_ready,
acqArb.io.in(1).ready)
acqRoq.io.enq.valid := acq_helper.fire(acq_roq_ready, acq_roq_enq)
acqRoq.io.enq.bits.data := iacq.isBuiltInType()
acqRoq.io.enq.bits.tag := iacq.client_xact_id
acqArb.io.in(0).valid := acq_helper.fire(acqArb.io.in(0).ready)
acqArb.io.in(0).bits := Acquire(
is_builtin_type = Bool(true),
a_type = Mux(iacq.isBuiltInType(),
iacq.a_type, Acquire.getBlockType),
client_xact_id = iacq.client_xact_id,
addr_block = iacq.addr_block,
addr_beat = iacq.addr_beat,
data = iacq.data,
union = iacq.union)
io.in.acquire.ready := acq_helper.fire(io.in.acquire.valid)
relRoq.io.enq.valid := rel_helper.fire(rel_roq_ready, rel_roq_enq)
relRoq.io.enq.bits.data := irel.isVoluntary()
relRoq.io.enq.bits.tag := irel.client_xact_id
acqArb.io.in(1).valid := rel_helper.fire(acqArb.io.in(1).ready)
acqArb.io.in(1).bits := PutBlock(
client_xact_id = irel.client_xact_id,
addr_block = irel.addr_block,
addr_beat = irel.addr_beat,
data = irel.data)
io.in.release.ready := rel_helper.fire(io.in.release.valid)
io.out.acquire <> acqArb.io.out
val grant_deq_roq = io.out.grant.fire() && ognt.last()
acqRoq.io.deq.valid := acqRoq.io.deq.matches && grant_deq_roq
acqRoq.io.deq.tag := ognt.client_xact_id
relRoq.io.deq.valid := !acqRoq.io.deq.matches && grant_deq_roq
relRoq.io.deq.tag := ognt.client_xact_id
assert(!grant_deq_roq || acqRoq.io.deq.matches || relRoq.io.deq.matches,
"TileLink Unwrapper: client_xact_id mismatch")
val gnt_builtin = acqRoq.io.deq.data
val gnt_voluntary = relRoq.io.deq.data
val acq_grant = Grant(
is_builtin_type = gnt_builtin,
g_type = Mux(gnt_builtin, ognt.g_type, tlCoh.getExclusiveGrantType),
client_xact_id = ognt.client_xact_id,
manager_xact_id = ognt.manager_xact_id,
addr_beat = ognt.addr_beat,
data = ognt.data)
assert(!io.in.release.valid || io.in.release.bits.isVoluntary(), "Unwrapper can only process voluntary releases.")
val rel_grant = Grant(
is_builtin_type = Bool(true),
g_type = Grant.voluntaryAckType, // We should only every be working with voluntary releases
client_xact_id = ognt.client_xact_id,
manager_xact_id = ognt.manager_xact_id,
addr_beat = ognt.addr_beat,
data = ognt.data)
io.in.grant.valid := io.out.grant.valid
io.in.grant.bits := Mux(acqRoq.io.deq.matches, acq_grant, rel_grant)
io.out.grant.ready := io.in.grant.ready
io.in.probe.valid := Bool(false)
}
object TileLinkWidthAdapter {
def apply(in: ClientUncachedTileLinkIO, outerId: String)(implicit p: Parameters) = {
val outerDataBits = p(TLKey(outerId)).dataBitsPerBeat
if (outerDataBits > in.tlDataBits) {
val widener = Module(new TileLinkIOWidener(in.p(TLId), outerId))
widener.io.in <> in
widener.io.out
} else if (outerDataBits < in.tlDataBits) {
val narrower = Module(new TileLinkIONarrower(in.p(TLId), outerId))
narrower.io.in <> in
narrower.io.out
} else { in }
}
def apply(out: ClientUncachedTileLinkIO, in: ClientUncachedTileLinkIO)(implicit p: Parameters): Unit = {
require(out.tlDataBits * out.tlDataBeats == in.tlDataBits * in.tlDataBeats)
out <> apply(in, out.p(TLId))
}
}
class TileLinkIOWidener(innerTLId: String, outerTLId: String)
(implicit p: Parameters) extends TLModule()(p) {
val paddrBits = p(PAddrBits)
val innerParams = p(TLKey(innerTLId))
val outerParams = p(TLKey(outerTLId))
val innerDataBeats = innerParams.dataBeats
val innerDataBits = innerParams.dataBitsPerBeat
val innerWriteMaskBits = innerParams.writeMaskBits
val innerByteAddrBits = log2Up(innerWriteMaskBits)
val innerMaxXacts = innerParams.maxClientXacts * innerParams.maxClientsPerPort
val innerXactIdBits = log2Up(innerMaxXacts)
val outerDataBeats = outerParams.dataBeats
val outerDataBits = outerParams.dataBitsPerBeat
val outerWriteMaskBits = outerParams.writeMaskBits
val outerByteAddrBits = log2Up(outerWriteMaskBits)
val outerBeatAddrBits = log2Up(outerDataBeats)
val outerBlockOffset = outerBeatAddrBits + outerByteAddrBits
val outerMaxClients = outerParams.maxClientsPerPort
val outerClientIdBits = log2Up(outerParams.maxClientXacts * outerMaxClients)
val outerManagerIdBits = log2Up(outerParams.maxManagerXacts)
val outerBlockAddrBits = paddrBits - outerBlockOffset
require(outerDataBeats <= innerDataBeats)
require(outerDataBits >= innerDataBits)
require(outerDataBits % innerDataBits == 0)
require(outerDataBits * outerDataBeats == innerDataBits * innerDataBeats)
val factor = innerDataBeats / outerDataBeats
val io = new Bundle {
val in = new ClientUncachedTileLinkIO()(p.alterPartial({case TLId => innerTLId})).flip
val out = new ClientUncachedTileLinkIO()(p.alterPartial({case TLId => outerTLId}))
}
val iacq = io.in.acquire.bits
val oacq = io.out.acquire.bits
val ognt = io.out.grant.bits
val ignt = io.in.grant.bits
val shrink = iacq.a_type === Acquire.putBlockType
val stretch = ognt.g_type === Grant.getDataBlockType
val smallget = iacq.a_type === Acquire.getType
val smallput = iacq.a_type === Acquire.putType
val smallgnt = ognt.g_type === Grant.getDataBeatType
val sending_put = Reg(init = Bool(false))
val collecting = Reg(init = Bool(false))
val put_block = Reg(UInt(width = outerBlockAddrBits))
val put_id = Reg(UInt(width = outerClientIdBits))
val put_data = Reg(Vec(factor, UInt(width = innerDataBits)))
val put_wmask = Reg(Vec(factor, UInt(width = innerWriteMaskBits)))
val put_allocate = Reg(Bool())
val (put_beat, put_done) = Counter(io.out.acquire.fire() && oacq.hasMultibeatData(), outerDataBeats)
val (recv_idx, recv_done) = Counter(io.in.acquire.fire() && iacq.hasMultibeatData(), factor)
val in_addr = iacq.full_addr()
val out_addr_block = in_addr(paddrBits - 1, outerBlockOffset)
val out_addr_beat = in_addr(outerBlockOffset - 1, outerByteAddrBits)
val out_addr_byte = in_addr(outerByteAddrBits - 1, 0)
val switch_addr = in_addr(outerByteAddrBits - 1, innerByteAddrBits)
val smallget_switch = Reg(Vec(innerMaxXacts, switch_addr))
def align_data(addr: UInt, data: UInt): UInt =
data << Cat(addr, UInt(0, log2Up(innerDataBits)))
def align_wmask(addr: UInt, wmask: UInt): UInt =
wmask << Cat(addr, UInt(0, log2Up(innerWriteMaskBits)))
val outerConfig = p.alterPartial({ case TLId => outerTLId })
val get_acquire = Get(
client_xact_id = iacq.client_xact_id,
addr_block = out_addr_block,
addr_beat = out_addr_beat,
addr_byte = out_addr_byte,
operand_size = iacq.op_size(),
alloc = iacq.allocate())(outerConfig)
val get_block_acquire = GetBlock(
client_xact_id = iacq.client_xact_id,
addr_block = out_addr_block,
alloc = iacq.allocate())(outerConfig)
val put_acquire = Put(
client_xact_id = iacq.client_xact_id,
addr_block = out_addr_block,
addr_beat = out_addr_beat,
data = align_data(switch_addr, iacq.data),
wmask = Some(align_wmask(switch_addr, iacq.wmask())),
alloc = iacq.allocate())(outerConfig)
val put_block_acquire = PutBlock(
client_xact_id = put_id,
addr_block = put_block,
addr_beat = put_beat,
data = put_data.asUInt,
wmask = Some(put_wmask.asUInt))(outerConfig)
io.out.acquire.valid := sending_put || (!shrink && io.in.acquire.valid)
io.out.acquire.bits := MuxCase(get_block_acquire, Seq(
sending_put -> put_block_acquire,
smallget -> get_acquire,
smallput -> put_acquire))
io.in.acquire.ready := !sending_put && (shrink || io.out.acquire.ready)
when (io.in.acquire.fire() && shrink) {
when (!collecting) {
put_block := out_addr_block
put_id := iacq.client_xact_id
put_allocate := iacq.allocate()
collecting := Bool(true)
}
put_data(recv_idx) := iacq.data
put_wmask(recv_idx) := iacq.wmask()
}
when (io.in.acquire.fire() && smallget) {
smallget_switch(iacq.client_xact_id) := switch_addr
}
when (recv_done) { sending_put := Bool(true) }
when (sending_put && io.out.acquire.ready) { sending_put := Bool(false) }
when (put_done) { collecting := Bool(false) }
val returning_data = Reg(init = Bool(false))
val (send_idx, send_done) = Counter(
io.in.grant.ready && returning_data, factor)
val gnt_beat = Reg(UInt(width = outerBeatAddrBits))
val gnt_client_id = Reg(UInt(width = outerClientIdBits))
val gnt_manager_id = Reg(UInt(width = outerManagerIdBits))
val gnt_data = Reg(UInt(width = outerDataBits))
when (io.out.grant.fire() && stretch) {
gnt_data := ognt.data
gnt_client_id := ognt.client_xact_id
gnt_manager_id := ognt.manager_xact_id
gnt_beat := ognt.addr_beat
returning_data := Bool(true)
}
when (send_done) { returning_data := Bool(false) }
def select_data(data: UInt, sel: UInt): UInt =
data >> (sel << log2Up(innerDataBits))
val gnt_switch = smallget_switch(ognt.client_xact_id)
val innerConfig = p.alterPartial({ case TLId => innerTLId })
val get_block_grant = Grant(
is_builtin_type = Bool(true),
g_type = Grant.getDataBlockType,
client_xact_id = gnt_client_id,
manager_xact_id = gnt_manager_id,
addr_beat = Cat(gnt_beat, send_idx),
data = select_data(gnt_data, send_idx))(innerConfig)
val get_grant = Grant(
is_builtin_type = Bool(true),
g_type = Grant.getDataBeatType,
client_xact_id = ognt.client_xact_id,
manager_xact_id = ognt.manager_xact_id,
addr_beat = Cat(ognt.addr_beat, gnt_switch),
data = select_data(ognt.data, gnt_switch))(innerConfig)
val default_grant = Grant(
is_builtin_type = Bool(true),
g_type = ognt.g_type,
client_xact_id = ognt.client_xact_id,
manager_xact_id = ognt.manager_xact_id,
addr_beat = ognt.addr_beat,
data = ognt.data)(innerConfig)
io.in.grant.valid := returning_data || (!stretch && io.out.grant.valid)
io.in.grant.bits := MuxCase(default_grant, Seq(
returning_data -> get_block_grant,
smallgnt -> get_grant))
io.out.grant.ready := !returning_data && (stretch || io.in.grant.ready)
}
class TileLinkIONarrower(innerTLId: String, outerTLId: String)
(implicit p: Parameters) extends TLModule()(p) {
val innerParams = p(TLKey(innerTLId))
val outerParams = p(TLKey(outerTLId))
val innerDataBeats = innerParams.dataBeats
val innerDataBits = innerParams.dataBitsPerBeat
val innerWriteMaskBits = innerParams.writeMaskBits
val innerByteAddrBits = log2Up(innerWriteMaskBits)
val outerDataBeats = outerParams.dataBeats
val outerDataBits = outerParams.dataBitsPerBeat
val outerWriteMaskBits = outerParams.writeMaskBits
val outerByteAddrBits = log2Up(outerWriteMaskBits)
val outerBeatAddrBits = log2Up(outerDataBeats)
val outerBlockOffset = outerBeatAddrBits + outerByteAddrBits
val outerMaxClients = outerParams.maxClientsPerPort
val outerIdBits = log2Up(outerParams.maxClientXacts * outerMaxClients)
require(outerDataBeats > innerDataBeats)
require(outerDataBeats % innerDataBeats == 0)
require(outerDataBits < innerDataBits)
require(outerDataBits * outerDataBeats == innerDataBits * innerDataBeats)
val factor = outerDataBeats / innerDataBeats
val io = new Bundle {
val in = new ClientUncachedTileLinkIO()(p.alterPartial({case TLId => innerTLId})).flip
val out = new ClientUncachedTileLinkIO()(p.alterPartial({case TLId => outerTLId}))
}
val iacq = io.in.acquire.bits
val ognt = io.out.grant.bits
val stretch = iacq.a_type === Acquire.putBlockType
val shrink = iacq.a_type === Acquire.getBlockType
val smallput = iacq.a_type === Acquire.putType
val smallget = iacq.a_type === Acquire.getType
val acq_data_buffer = Reg(UInt(width = innerDataBits))
val acq_wmask_buffer = Reg(UInt(width = innerWriteMaskBits))
val acq_client_id = Reg(iacq.client_xact_id)
val acq_addr_block = Reg(iacq.addr_block)
val acq_addr_beat = Reg(iacq.addr_beat)
val oacq_ctr = Counter(factor)
val outer_beat_addr = iacq.full_addr()(outerBlockOffset - 1, outerByteAddrBits)
val outer_byte_addr = iacq.full_addr()(outerByteAddrBits - 1, 0)
val mask_chunks = Vec.tabulate(factor) { i =>
val lsb = i * outerWriteMaskBits
val msb = (i + 1) * outerWriteMaskBits - 1
iacq.wmask()(msb, lsb)
}
val data_chunks = Vec.tabulate(factor) { i =>
val lsb = i * outerDataBits
val msb = (i + 1) * outerDataBits - 1
iacq.data(msb, lsb)
}
val beat_sel = Cat(mask_chunks.map(mask => mask.orR).reverse)
val smallput_data = Mux1H(beat_sel, data_chunks)
val smallput_wmask = Mux1H(beat_sel, mask_chunks)
val smallput_beat = Cat(iacq.addr_beat, PriorityEncoder(beat_sel))
assert(!io.in.acquire.valid || !smallput || PopCount(beat_sel) <= UInt(1),
"Can't perform Put wider than outer width")
val read_size_ok = iacq.op_size() <= UInt(log2Ceil(outerDataBits / 8))
assert(!io.in.acquire.valid || !smallget || read_size_ok,
"Can't perform Get wider than outer width")
val outerConfig = p.alterPartial({ case TLId => outerTLId })
val innerConfig = p.alterPartial({ case TLId => innerTLId })
val get_block_acquire = GetBlock(
client_xact_id = iacq.client_xact_id,
addr_block = iacq.addr_block,
alloc = iacq.allocate())(outerConfig)
val put_block_acquire = PutBlock(
client_xact_id = acq_client_id,
addr_block = acq_addr_block,
addr_beat = if (factor > 1)
Cat(acq_addr_beat, oacq_ctr.value)
else acq_addr_beat,
data = acq_data_buffer(outerDataBits - 1, 0),
wmask = Some(acq_wmask_buffer(outerWriteMaskBits - 1, 0)))(outerConfig)
val get_acquire = Get(
client_xact_id = iacq.client_xact_id,
addr_block = iacq.addr_block,
addr_beat = outer_beat_addr,
addr_byte = outer_byte_addr,
operand_size = iacq.op_size(),
alloc = iacq.allocate())(outerConfig)
val put_acquire = Put(
client_xact_id = iacq.client_xact_id,
addr_block = iacq.addr_block,
addr_beat = smallput_beat,
data = smallput_data,
wmask = Some(smallput_wmask))(outerConfig)
val sending_put = Reg(init = Bool(false))
val pass_valid = io.in.acquire.valid && !stretch
io.out.acquire.bits := MuxCase(Wire(io.out.acquire.bits, init=iacq), Seq(
(sending_put, put_block_acquire),
(shrink, get_block_acquire),
(smallput, put_acquire),
(smallget, get_acquire)))
io.out.acquire.valid := sending_put || pass_valid
io.in.acquire.ready := !sending_put && (stretch || io.out.acquire.ready)
when (io.in.acquire.fire() && stretch) {
acq_data_buffer := iacq.data
acq_wmask_buffer := iacq.wmask()
acq_client_id := iacq.client_xact_id
acq_addr_block := iacq.addr_block
acq_addr_beat := iacq.addr_beat
sending_put := Bool(true)
}
when (sending_put && io.out.acquire.ready) {
acq_data_buffer := acq_data_buffer >> outerDataBits
acq_wmask_buffer := acq_wmask_buffer >> outerWriteMaskBits
when (oacq_ctr.inc()) { sending_put := Bool(false) }
}
val ognt_block = ognt.hasMultibeatData()
val gnt_data_buffer = Reg(Vec(factor, UInt(width = outerDataBits)))
val gnt_client_id = Reg(ognt.client_xact_id)
val gnt_manager_id = Reg(ognt.manager_xact_id)
val ignt_ctr = Counter(innerDataBeats)
val ognt_ctr = Counter(factor)
val sending_get = Reg(init = Bool(false))
val get_block_grant = Grant(
is_builtin_type = Bool(true),
g_type = Grant.getDataBlockType,
client_xact_id = gnt_client_id,
manager_xact_id = gnt_manager_id,
addr_beat = ignt_ctr.value,
data = gnt_data_buffer.asUInt)(innerConfig)
val smallget_grant = ognt.g_type === Grant.getDataBeatType
val get_grant = Grant(
is_builtin_type = Bool(true),
g_type = Grant.getDataBeatType,
client_xact_id = ognt.client_xact_id,
manager_xact_id = ognt.manager_xact_id,
addr_beat = ognt.addr_beat >> UInt(log2Up(factor)),
data = Fill(factor, ognt.data))(innerConfig)
io.in.grant.valid := sending_get || (io.out.grant.valid && !ognt_block)
io.out.grant.ready := !sending_get && (ognt_block || io.in.grant.ready)
io.in.grant.bits := MuxCase(Wire(io.in.grant.bits, init=ognt), Seq(
sending_get -> get_block_grant,
smallget_grant -> get_grant))
when (io.out.grant.valid && ognt_block && !sending_get) {
gnt_data_buffer(ognt_ctr.value) := ognt.data
when (ognt_ctr.inc()) {
gnt_client_id := ognt.client_xact_id
gnt_manager_id := ognt.manager_xact_id
sending_get := Bool(true)
}
}
when (io.in.grant.ready && sending_get) {
ignt_ctr.inc()
sending_get := Bool(false)
}
}
class TileLinkFragmenterSource(implicit p: Parameters) extends TLModule()(p) {
val io = new Bundle {
val in = Decoupled(new Acquire).flip
val out = Decoupled(new Acquire)
val que = Decoupled(UInt(width = tlBeatAddrBits))
}
// Pipeline stage with acquire data; needed to ensure in.bits stay fixed when !in.ready
val acq_valid = RegInit(Bool(false))
val acq_bits = Reg(new Acquire)
// The last beat of generate acquire to send
val acq_last_beat = Reg(UInt(width = tlBeatAddrBits))
val acq_last = acq_bits.addr_beat === acq_last_beat
// 'in' has the first beat?
val in_multi_put = io.in.bits.isBuiltInType(Acquire.putBlockType)
val in_multi_get = io.in.bits.isBuiltInType(Acquire.getBlockType)
val in_first_beat = !in_multi_put || io.in.bits.addr_beat === UInt(0)
// Move stuff from acq to out whenever out is ready
io.out.valid := acq_valid
// When can acq accept a request?
val acq_ready = !acq_valid || (acq_last && io.out.ready)
// Move the first beat from in to acq only when both acq and que are ready
io.in.ready := (!in_first_beat || io.que.ready) && acq_ready
io.que.valid := (in_first_beat && io.in.valid) && acq_ready
// in.fire moves data from in to acq and (optionally) que
// out.fire moves data from acq to out
// Desired flow control results:
assert (!io.que.fire() || io.in.fire()) // 1. que.fire => in.fire
assert (!(io.in.fire() && in_first_beat) || io.que.fire()) // 2. in.fire && in_first_beat => que.fire
assert (!io.out.fire() || acq_valid) // 3. out.fire => acq_valid
assert (!io.in.fire() || (!acq_valid || (io.out.fire() && acq_last))) // 4. in.fire => !acq_valid || (out.fire && acq_last)
// Proofs:
// 1. que.fire => que.ready && in.valid && acq_ready => in.ready && in.valid
// 2. in.fire && in_first_beat => in.valid && acq_ready && [(!in_first_beat || que.ready) && in_first_beat] =>
// in.valid && acq_ready && que.ready && in_first_beat => que.valid && que.ready
// 3. out.fire => out.valid => acq_valid
// 4. in.fire => acq_ready => !acq_valid || (acq_last && out.ready) =>
// !acq_valid || (acq_valid && acq_last && out.ready) => !acq_valid || (acq_last && out.fire)
val multi_size = SInt(-1, width = tlBeatAddrBits).asUInt // TL2: use in.bits.size()/beatBits-1
val in_sizeMinus1 = Mux(in_multi_get || in_multi_put, multi_size, UInt(0))
val in_insertSizeMinus1 = Mux(in_multi_get, multi_size, UInt(0))
when (io.in.fire()) {
// Theorem 4 makes this safe; we overwrite garbage, or replace the final acq
acq_valid := Bool(true)
acq_bits := io.in.bits
acq_last_beat := io.in.bits.addr_beat + in_insertSizeMinus1
// Replace this with size truncation in TL2:
acq_bits.a_type := Mux(in_multi_put, Acquire.putType, Mux(in_multi_get, Acquire.getType, io.in.bits.a_type))
} .elsewhen (io.out.fire()) {
acq_valid := !acq_last // false => !in.valid || (!que.ready && in_first_beat)
acq_bits.addr_beat := acq_bits.addr_beat + UInt(1)
// acq_last && out.fire => acq_last && out.ready && acq_valid => acq_ready
// Suppose in.valid, then !in.fire => !in.ready => !(!in_first_beat || que.ready) => !que.ready && in_first_beat
}
// Safe by theorem 3
io.out.bits := acq_bits
// Safe by theorem 1
io.que.bits := in_sizeMinus1
}
class TileLinkFragmenterSink(implicit p: Parameters) extends TLModule()(p) {
val io = new Bundle {
val in = Decoupled(new Grant).flip
val out = Decoupled(new Grant)
val que = Decoupled(UInt(width = tlBeatAddrBits)).flip
}
val count_valid = RegInit(Bool(false))
val multi_op = Reg(Bool())
val count_bits = Reg(UInt(width = tlBeatAddrBits))
val last = count_bits === UInt(0)
val in_put = io.in.bits.isBuiltInType(Grant.putAckType)
val in_get = io.in.bits.isBuiltInType(Grant.getDataBeatType)
val deliver = last || in_get
// Accept the input, discarding the non-final put grant
io.in.ready := count_valid && (io.out.ready || !deliver)
// Output the grant whenever we want delivery
io.out.valid := count_valid && io.in.valid && deliver
// Take a new number whenever we deliver the last beat
io.que.ready := !count_valid || (io.in.valid && io.out.ready && last)
// Desired flow control results:
assert (!io.out.fire() || (count_valid && io.in.fire())) // 1. out.fire => in.fire && count_valid
assert (!(io.in.fire() && deliver) || io.out.fire()) // 2. in.fire && deliver => out.fire
assert (!(io.out.fire() && last) || io.que.ready) // 3. out.fire && last => que.ready
assert (!io.que.fire() || (!count_valid || io.out.fire())) // 4. que.fire => !count_valid || (out.fire && last)
// Proofs:
// 1. out.fire => out.ready && (count_valid && in.valid && deliver) => (count_valid && out.ready) && in.valid => in.fire
// 2. in.fire && deliver => in.valid && count_valid && [(out.ready || !deliver) && deliver] =>
// in.valid && count_valid && deliver && out.ready => out.fire
// 3. out.fire && last => out.valid && out.ready && last => in.valid && out.ready && last => que.ready
// 4. que.fire => que.valid && (!count_valid || (in.valid && out.ready && last))
// => !count_valid || (count_valid && in.valid && out.ready && [last => deliver])
// => !count_valid || (out.valid && out.ready && last)
when (io.que.fire()) {
// Theorem 4 makes this safe; we overwrite garbage or last output
count_valid := Bool(true)
count_bits := io.que.bits
multi_op := io.que.bits =/= UInt(0)
} .elsewhen (io.in.fire()) {
count_valid := !last // false => !que.valid
count_bits := count_bits - UInt(1)
// Proof: in.fire && [last => deliver] =2=> out.fire && last =3=> que.ready
// !que.fire && que.ready => !que.valid
}
// Safe by Theorem 1
io.out.bits := io.in.bits
io.out.bits.g_type := Mux(multi_op, Mux(in_get, Grant.getDataBlockType, Grant.putAckType), io.in.bits.g_type)
}
class TileLinkFragmenter(depth: Int = 1)(implicit p: Parameters) extends TLModule()(p) {
val io = new Bundle {
val in = new ClientUncachedTileLinkIO().flip
val out = new ClientUncachedTileLinkIO
}
// TL2:
// supportsAcquire = false
// modify all outward managers to supportsMultibeat = true
// assert: all managers must behaveFIFO (not inspect duplicated id field)
val source = Module(new TileLinkFragmenterSource)
val sink = Module(new TileLinkFragmenterSink)
sink.io.que <> Queue(source.io.que, depth)
source.io.in <> io.in.acquire
io.out.acquire <> source.io.out
sink.io.in <> io.out.grant
io.in.grant <> sink.io.out
}
object TileLinkFragmenter {
// Pass the source/client to fragment
def apply(source: ClientUncachedTileLinkIO, depth: Int = 1)(implicit p: Parameters): ClientUncachedTileLinkIO = {
val fragmenter = Module(new TileLinkFragmenter(depth))
fragmenter.io.in <> source
fragmenter.io.out
}
}

View File

@ -0,0 +1,161 @@
package uncore.devices
import Chisel._
import cde.{Parameters, Field}
import junctions._
import uncore.tilelink._
import uncore.util._
import HastiConstants._
class BRAMSlave(depth: Int)(implicit val p: Parameters) extends Module
with HasTileLinkParameters {
val io = new ClientUncachedTileLinkIO().flip
// For TL2:
// supportsAcquire = false
// supportsMultibeat = false
// supportsHint = false
// supportsAtomic = false
// Timing-wise, we assume the input is coming out of registers
// since you probably needed a TileLinkFragmenter infront of us
// Thus, only one pipeline stage: the grant result
val g_valid = RegInit(Bool(false))
val g_bits = Reg(new Grant)
// Just pass the pipeline straight through
io.grant.valid := g_valid
io.grant.bits := g_bits
io.acquire.ready := !g_valid || io.grant.ready
val acq_get = io.acquire.bits.isBuiltInType(Acquire.getType)
val acq_put = io.acquire.bits.isBuiltInType(Acquire.putType)
val acq_addr = Cat(io.acquire.bits.addr_block, io.acquire.bits.addr_beat)
val bram = Mem(depth, Bits(width = tlDataBits))
val ren = acq_get && io.acquire.fire()
val wen = acq_put && io.acquire.fire()
when (io.grant.fire()) {
g_valid := Bool(false)
}
when (io.acquire.fire()) {
g_valid := Bool(true)
g_bits := Grant(
is_builtin_type = Bool(true),
g_type = io.acquire.bits.getBuiltInGrantType(),
client_xact_id = io.acquire.bits.client_xact_id,
manager_xact_id = UInt(0),
addr_beat = io.acquire.bits.addr_beat,
data = UInt(0))
}
when (wen) {
bram.write(acq_addr, io.acquire.bits.data)
assert(io.acquire.bits.wmask().andR, "BRAMSlave: partial write masks not supported")
}
io.grant.bits.data := RegEnable(bram.read(acq_addr), ren)
}
class HastiRAM(depth: Int)(implicit p: Parameters) extends HastiModule()(p) {
val io = new HastiSlaveIO
val wdata = Vec.tabulate(hastiDataBytes)(i => io.hwdata(8*(i+1)-1,8*i))
val waddr = Reg(UInt(width = hastiAddrBits))
val wvalid = Reg(init = Bool(false))
val wsize = Reg(UInt(width = SZ_HSIZE))
val ram = SeqMem(depth, Vec(hastiDataBytes, Bits(width = 8)))
val max_size = log2Ceil(hastiDataBytes)
val wmask_lut = MuxLookup(wsize, SInt(-1, hastiDataBytes).asUInt,
(0 until max_size).map(sz => (UInt(sz) -> UInt((1 << (1 << sz)) - 1))))
val wmask = (wmask_lut << waddr(max_size - 1, 0))(hastiDataBytes - 1, 0)
val is_trans = io.hsel && io.htrans.isOneOf(HTRANS_NONSEQ, HTRANS_SEQ)
val raddr = io.haddr >> UInt(max_size)
val ren = is_trans && !io.hwrite
val bypass = Reg(init = Bool(false))
when (is_trans && io.hwrite) {
waddr := io.haddr
wsize := io.hsize
wvalid := Bool(true)
} .otherwise { wvalid := Bool(false) }
when (ren) { bypass := wvalid && (waddr >> UInt(max_size)) === raddr }
when (wvalid) {
ram.write(waddr >> UInt(max_size), wdata, wmask.toBools)
}
val rdata = ram.read(raddr, ren)
io.hrdata := Cat(rdata.zip(wmask.toBools).zip(wdata).map {
case ((rbyte, wsel), wbyte) => Mux(wsel && bypass, wbyte, rbyte)
}.reverse)
io.hready := Bool(true)
io.hresp := HRESP_OKAY
}
/**
* This RAM is not meant to be particularly performant.
* It just supports the entire range of uncached TileLink operations in the
* simplest way possible.
*/
class TileLinkTestRAM(depth: Int)(implicit val p: Parameters) extends Module
with HasTileLinkParameters {
val io = new ClientUncachedTileLinkIO().flip
val ram = Mem(depth, UInt(width = tlDataBits))
val responding = Reg(init = Bool(false))
val acq = io.acquire.bits
val r_acq = Reg(io.acquire.bits)
val acq_addr = Cat(acq.addr_block, acq.addr_beat)
val r_acq_addr = Cat(r_acq.addr_block, r_acq.addr_beat)
when (io.acquire.fire() && io.acquire.bits.last()) {
r_acq := io.acquire.bits
responding := Bool(true)
}
when (io.grant.fire()) {
val is_getblk = r_acq.isBuiltInType(Acquire.getBlockType)
val last_beat = r_acq.addr_beat === UInt(tlDataBeats - 1)
when (is_getblk && !last_beat) {
r_acq.addr_beat := r_acq.addr_beat + UInt(1)
} .otherwise { responding := Bool(false) }
}
val old_data = ram(acq_addr)
val new_data = acq.data
val r_old_data = RegEnable(old_data, io.acquire.fire())
io.acquire.ready := !responding
io.grant.valid := responding
io.grant.bits := Grant(
is_builtin_type = Bool(true),
g_type = r_acq.getBuiltInGrantType(),
client_xact_id = r_acq.client_xact_id,
manager_xact_id = UInt(0),
addr_beat = r_acq.addr_beat,
data = Mux(r_acq.isAtomic(), r_old_data, ram(r_acq_addr)))
val amo_shift_bits = acq.amo_shift_bytes() << UInt(3)
val amoalu = Module(new AMOALU(amoAluOperandBits, rhsIsAligned = true))
amoalu.io.addr := Cat(acq.addr_block, acq.addr_beat, acq.addr_byte())
amoalu.io.cmd := acq.op_code()
amoalu.io.typ := acq.op_size()
amoalu.io.lhs := old_data >> amo_shift_bits
amoalu.io.rhs := new_data >> amo_shift_bits
val result = Mux(acq.isAtomic(), amoalu.io.out << amo_shift_bits, new_data)
val wmask = FillInterleaved(8, acq.wmask())
when (io.acquire.fire() && acq.hasData()) {
ram(acq_addr) := (old_data & ~wmask) | (result & wmask)
}
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,187 @@
// See LICENSE for license details.
package uncore.devices
import Chisel._
import Chisel.ImplicitConversions._
import junctions._
import uncore.tilelink._
import cde.Parameters
class GatewayPLICIO extends Bundle {
val valid = Bool(OUTPUT)
val ready = Bool(INPUT)
val complete = Bool(INPUT)
}
class LevelGateway extends Module {
val io = new Bundle {
val interrupt = Bool(INPUT)
val plic = new GatewayPLICIO
}
val inFlight = Reg(init=Bool(false))
when (io.interrupt && io.plic.ready) { inFlight := true }
when (io.plic.complete) { inFlight := false }
io.plic.valid := io.interrupt && !inFlight
}
case class PLICConfig(nHartsIn: Int, supervisor: Boolean, nDevices: Int, nPriorities: Int) {
def contextsPerHart = if (supervisor) 2 else 1
def nHarts = contextsPerHart * nHartsIn
def context(i: Int, mode: Char) = mode match {
case 'M' => i * contextsPerHart
case 'S' => require(supervisor); i * contextsPerHart + 1
}
def claimAddr(i: Int, mode: Char) = hartBase + hartOffset(context(i, mode)) + claimOffset
def threshAddr(i: Int, mode: Char) = hartBase + hartOffset(context(i, mode))
def enableAddr(i: Int, mode: Char) = enableBase + enableOffset(context(i, mode))
def size = hartBase + hartOffset(maxHarts)
def maxDevices = 1023
def maxHarts = 15872
def pendingBase = 0x1000
def enableBase = 0x2000
def hartBase = 0x200000
require(hartBase >= enableBase + enableOffset(maxHarts))
def enableOffset(i: Int) = i * ((maxDevices+7)/8)
def hartOffset(i: Int) = i * 0x1000
def claimOffset = 4
def priorityBytes = 4
require(nDevices > 0 && nDevices <= maxDevices)
require(nHarts > 0 && nHarts <= maxHarts)
require(nPriorities >= 0 && nPriorities <= nDevices)
}
/** Platform-Level Interrupt Controller */
class PLIC(val cfg: PLICConfig)(implicit val p: Parameters) extends Module
with HasTileLinkParameters
with HasAddrMapParameters {
val io = new Bundle {
val devices = Vec(cfg.nDevices, new GatewayPLICIO).flip
val harts = Vec(cfg.nHarts, Bool()).asOutput
val tl = new ClientUncachedTileLinkIO().flip
}
val priority =
if (cfg.nPriorities > 0) Reg(Vec(cfg.nDevices+1, UInt(width=log2Up(cfg.nPriorities+1))))
else Wire(init=Vec.fill(cfg.nDevices+1)(UInt(1)))
val threshold =
if (cfg.nPriorities > 0) Reg(Vec(cfg.nHarts, UInt(width = log2Up(cfg.nPriorities+1))))
else Wire(init=Vec.fill(cfg.nHarts)(UInt(0)))
val pending = Reg(init=Vec.fill(cfg.nDevices+1){Bool(false)})
val enables = Reg(Vec(cfg.nHarts, Vec(cfg.nDevices+1, Bool())))
for ((p, g) <- pending.tail zip io.devices) {
g.ready := !p
g.complete := false
when (g.valid) { p := true }
}
def findMax(x: Seq[UInt]): (UInt, UInt) = {
if (x.length > 1) {
val half = 1 << (log2Ceil(x.length) - 1)
val lMax = findMax(x take half)
val rMax = findMax(x drop half)
val useLeft = lMax._1 >= rMax._1
(Mux(useLeft, lMax._1, rMax._1), Mux(useLeft, lMax._2, UInt(half) + rMax._2))
} else (x.head, UInt(0))
}
val maxDevs = Wire(Vec(cfg.nHarts, UInt(width = log2Up(pending.size))))
for (hart <- 0 until cfg.nHarts) {
val effectivePriority =
for (((p, en), pri) <- (pending zip enables(hart) zip priority).tail)
yield Cat(p && en, pri)
val (maxPri, maxDev) = findMax((UInt(1) << priority(0).getWidth) +: effectivePriority)
maxDevs(hart) := Reg(next = maxDev)
io.harts(hart) := Reg(next = maxPri) > Cat(UInt(1), threshold(hart))
}
val acq = Queue(io.tl.acquire, 1)
val read = acq.fire() && acq.bits.isBuiltInType(Acquire.getType)
val write = acq.fire() && acq.bits.isBuiltInType(Acquire.putType)
assert(!acq.fire() || read || write, "unsupported PLIC operation")
val addr = acq.bits.full_addr()(log2Up(cfg.size)-1,0)
val claimant =
if (cfg.nHarts == 1) UInt(0)
else (addr - cfg.hartBase)(log2Up(cfg.hartOffset(cfg.nHarts))-1,log2Up(cfg.hartOffset(1)))
val hart = Wire(init = claimant)
val myMaxDev = maxDevs(claimant)
val myEnables = enables(hart)
val rdata = Wire(init = UInt(0, tlDataBits))
val masked_wdata = (acq.bits.data & acq.bits.full_wmask()) | (rdata & ~acq.bits.full_wmask())
when (addr >= cfg.hartBase) {
val word =
if (tlDataBytes > cfg.claimOffset) UInt(0)
else addr(log2Up(cfg.claimOffset),log2Up(tlDataBytes))
rdata := Cat(myMaxDev, UInt(0, 8*cfg.priorityBytes-threshold(0).getWidth), threshold(claimant)) >> (word * tlDataBits)
when (read && addr(log2Ceil(cfg.claimOffset))) {
pending(myMaxDev) := false
}
when (write) {
when (if (tlDataBytes > cfg.claimOffset) acq.bits.wmask()(cfg.claimOffset) else addr(log2Ceil(cfg.claimOffset))) {
val dev = (acq.bits.data >> ((8 * cfg.claimOffset) % tlDataBits))(log2Up(pending.size)-1,0)
when (myEnables(dev)) { io.devices(dev-1).complete := true }
}.otherwise {
if (cfg.nPriorities > 0) threshold(claimant) := acq.bits.data
}
}
}.elsewhen (addr >= cfg.enableBase) {
val enableHart =
if (cfg.nHarts > 1) (addr - cfg.enableBase)(log2Up(cfg.enableOffset(cfg.nHarts))-1,log2Up(cfg.enableOffset(1)))
else UInt(0)
hart := enableHart
val word =
if (tlDataBits >= cfg.nHarts) UInt(0)
else addr(log2Up((cfg.nHarts+7)/8)-1,log2Up(tlDataBytes))
for (i <- 0 until cfg.nHarts by tlDataBits) {
when (word === i/tlDataBits) {
rdata := Cat(myEnables.slice(i, i + tlDataBits).reverse)
for (j <- 0 until (tlDataBits min (myEnables.size - i))) {
when (write) { enables(enableHart)(i+j) := masked_wdata(j) }
}
}
}
}.elsewhen (addr >= cfg.pendingBase) {
val word =
if (tlDataBytes >= pending.size) UInt(0)
else addr(log2Up(pending.size)-1,log2Up(tlDataBytes))
rdata := pending.asUInt >> (word * tlDataBits)
}.otherwise {
val regsPerBeat = tlDataBytes >> log2Up(cfg.priorityBytes)
val word =
if (regsPerBeat >= priority.size) UInt(0)
else addr(log2Up(priority.size*cfg.priorityBytes)-1,log2Up(tlDataBytes))
for (i <- 0 until priority.size by regsPerBeat) {
when (word === i/regsPerBeat) {
rdata := Cat(priority.slice(i, i + regsPerBeat).map(p => Cat(UInt(0, 8*cfg.priorityBytes-p.getWidth), p)).reverse)
for (j <- 0 until (regsPerBeat min (priority.size - i))) {
if (cfg.nPriorities > 0) when (write) { priority(i+j) := masked_wdata >> (j * 8 * cfg.priorityBytes) }
}
}
}
}
priority(0) := 0
pending(0) := false
for (e <- enables)
e(0) := false
io.tl.grant.valid := acq.valid
acq.ready := io.tl.grant.ready
io.tl.grant.bits := Grant(
is_builtin_type = Bool(true),
g_type = acq.bits.getBuiltInGrantType(),
client_xact_id = acq.bits.client_xact_id,
manager_xact_id = UInt(0),
addr_beat = UInt(0),
data = rdata)
}

View File

@ -0,0 +1,127 @@
// See LICENSE for license details.
package uncore.devices
import Chisel._
import Chisel.ImplicitConversions._
import junctions._
import junctions.NastiConstants._
import uncore.tilelink._
import cde.{Parameters, Field}
/** Number of tiles */
case object NTiles extends Field[Int]
class PRCIInterrupts extends Bundle {
val meip = Bool()
val seip = Bool()
val debug = Bool()
}
class PRCITileIO(implicit p: Parameters) extends Bundle {
val reset = Bool(OUTPUT)
val id = UInt(OUTPUT, log2Up(p(NTiles)))
val interrupts = new PRCIInterrupts {
val mtip = Bool()
val msip = Bool()
}.asOutput
override def cloneType: this.type = new PRCITileIO().asInstanceOf[this.type]
}
object PRCI {
def msip(hart: Int) = hart * msipBytes
def timecmp(hart: Int) = 0x4000 + hart * timecmpBytes
def time = 0xbff8
def msipBytes = 4
def timecmpBytes = 8
def size = 0xc000
}
/** Power, Reset, Clock, Interrupt */
class PRCI(implicit val p: Parameters) extends Module
with HasTileLinkParameters
with HasAddrMapParameters {
val io = new Bundle {
val interrupts = Vec(p(NTiles), new PRCIInterrupts).asInput
val tl = new ClientUncachedTileLinkIO().flip
val tiles = Vec(p(NTiles), new PRCITileIO)
val rtcTick = Bool(INPUT)
}
val timeWidth = 64
val timecmp = Reg(Vec(p(NTiles), UInt(width = timeWidth)))
val time = Reg(init=UInt(0, timeWidth))
when (io.rtcTick) { time := time + UInt(1) }
val ipi = Reg(init=Vec.fill(p(NTiles))(UInt(0, 32)))
val acq = Queue(io.tl.acquire, 1)
val addr = acq.bits.full_addr()(log2Ceil(PRCI.size)-1,0)
val read = acq.bits.isBuiltInType(Acquire.getType)
val rdata = Wire(init=UInt(0))
io.tl.grant.valid := acq.valid
acq.ready := io.tl.grant.ready
io.tl.grant.bits := Grant(
is_builtin_type = Bool(true),
g_type = acq.bits.getBuiltInGrantType(),
client_xact_id = acq.bits.client_xact_id,
manager_xact_id = UInt(0),
addr_beat = UInt(0),
data = rdata)
when (addr(log2Floor(PRCI.time))) {
require(log2Floor(PRCI.timecmp(p(NTiles)-1)) < log2Floor(PRCI.time))
rdata := load(Vec(time + UInt(0)), acq.bits)
}.elsewhen (addr >= PRCI.timecmp(0)) {
rdata := store(timecmp, acq.bits)
}.otherwise {
rdata := store(ipi, acq.bits) & Fill(tlDataBits/32, UInt(1, 32))
}
for ((tile, i) <- io.tiles zipWithIndex) {
tile.interrupts := io.interrupts(i)
tile.interrupts.msip := ipi(i)(0)
tile.interrupts.mtip := time >= timecmp(i)
tile.id := UInt(i)
}
// TODO generalize these to help other TL slaves
def load(v: Vec[UInt], acq: Acquire): UInt = {
val w = v.head.getWidth
val a = acq.full_addr()
require(isPow2(w) && w >= 8)
if (w > tlDataBits) {
(v(a(log2Up(w/8*v.size)-1,log2Up(w/8))) >> a(log2Up(w/8)-1,log2Up(tlDataBytes)))(tlDataBits-1,0)
} else {
val row = for (i <- 0 until v.size by tlDataBits/w)
yield Cat(v.slice(i, i + tlDataBits/w).reverse)
if (row.size == 1) row.head
else Vec(row)(a(log2Up(w/8*v.size)-1,log2Up(tlDataBytes)))
}
}
def store(v: Vec[UInt], acq: Acquire): UInt = {
val w = v.head.getWidth
require(isPow2(w) && w >= 8)
val a = acq.full_addr()
val rdata = load(v, acq)
val wdata = (acq.data & acq.full_wmask()) | (rdata & ~acq.full_wmask())
if (w <= tlDataBits) {
val word =
if (tlDataBits/w >= v.size) UInt(0)
else a(log2Up(w/8*v.size)-1,log2Up(tlDataBytes))
for (i <- 0 until v.size) {
when (acq.isBuiltInType(Acquire.putType) && word === i/(tlDataBits/w)) {
val base = i % (tlDataBits/w)
v(i) := wdata >> (w * (i % (tlDataBits/w)))
}
}
} else {
val i = a(log2Up(w/8*v.size)-1,log2Up(w/8))
val mask = FillInterleaved(tlDataBits, UIntToOH(a(log2Up(w/8)-1,log2Up(tlDataBytes))))
v(i) := (wdata & mask) | (v(i) & ~mask)
}
rdata
}
}

View File

@ -0,0 +1,66 @@
package uncore.devices
import Chisel._
import junctions._
import uncore.tilelink._
import uncore.util._
import cde.{Parameters, Field}
class ROMSlave(contents: Seq[Byte])(implicit val p: Parameters) extends Module
with HasTileLinkParameters
with HasAddrMapParameters {
val io = new ClientUncachedTileLinkIO().flip
val acq = Queue(io.acquire, 1)
val single_beat = acq.bits.isBuiltInType(Acquire.getType)
val multi_beat = acq.bits.isBuiltInType(Acquire.getBlockType)
assert(!acq.valid || single_beat || multi_beat, "unsupported ROMSlave operation")
val addr_beat = Reg(UInt())
when (io.grant.fire()) { addr_beat := addr_beat + UInt(1) }
when (io.acquire.fire()) { addr_beat := io.acquire.bits.addr_beat }
val byteWidth = tlDataBits / 8
val rows = (contents.size + byteWidth - 1)/byteWidth
val rom = Vec.tabulate(rows) { i =>
val slice = contents.slice(i*byteWidth, (i+1)*byteWidth)
UInt(slice.foldRight(BigInt(0)) { case (x,y) => (y << 8) + (x.toInt & 0xFF) }, byteWidth*8)
}
val raddr = Cat(acq.bits.addr_block, addr_beat)
val rdata = rom(if (rows == 1) UInt(0) else raddr(log2Up(rom.size)-1,0))
val last = !multi_beat || addr_beat === UInt(tlDataBeats-1)
io.grant.valid := acq.valid
acq.ready := io.grant.ready && last
io.grant.bits := Grant(
is_builtin_type = Bool(true),
g_type = acq.bits.getBuiltInGrantType(),
client_xact_id = acq.bits.client_xact_id,
manager_xact_id = UInt(0),
addr_beat = addr_beat,
data = rdata)
}
class NastiROM(contents: Seq[Byte])(implicit p: Parameters) extends Module {
val io = new NastiIO().flip
val ar = Queue(io.ar, 1)
// This assumes ROMs are in read-only parts of the address map.
// Reuse b_queue code from NastiErrorSlave if this assumption is bad.
when (ar.valid) { assert(ar.bits.len === UInt(0), "Can't burst-read from NastiROM") }
assert(!(io.aw.valid || io.w.valid), "Can't write to NastiROM")
io.aw.ready := Bool(false)
io.w.ready := Bool(false)
io.b.valid := Bool(false)
val byteWidth = io.r.bits.nastiXDataBits / 8
val rows = (contents.size + byteWidth - 1)/byteWidth
val rom = Vec.tabulate(rows) { i =>
val slice = contents.slice(i*byteWidth, (i+1)*byteWidth)
UInt(slice.foldRight(BigInt(0)) { case (x,y) => (y << 8) + (x.toInt & 0xFF) }, byteWidth*8)
}
val rdata = rom(if (rows == 1) UInt(0) else ar.bits.addr(log2Up(contents.size)-1,log2Up(byteWidth)))
io.r <> ar
io.r.bits := NastiReadDataChannel(ar.bits.id, rdata)
}

View File

@ -0,0 +1,196 @@
package uncore.tilelink
import Chisel._
import junctions._
import cde.{Parameters, Field}
/** Utility functions for constructing TileLinkIO arbiters */
trait TileLinkArbiterLike extends HasTileLinkParameters {
// Some shorthand type variables
type ManagerSourcedWithId = ManagerToClientChannel with HasClientTransactionId
type ClientSourcedWithId = ClientToManagerChannel with HasClientTransactionId
type ClientSourcedWithIdAndData = ClientToManagerChannel with HasClientTransactionId with HasTileLinkData
val arbN: Int // The number of ports on the client side
// These abstract funcs are filled in depending on whether the arbiter mucks with the
// outgoing client ids to track sourcing and then needs to revert them on the way back
def clientSourcedClientXactId(in: ClientSourcedWithId, id: Int): Bits
def managerSourcedClientXactId(in: ManagerSourcedWithId): Bits
def arbIdx(in: ManagerSourcedWithId): UInt
// The following functions are all wiring helpers for each of the different types of TileLink channels
def hookupClientSource[M <: ClientSourcedWithIdAndData](
clts: Seq[DecoupledIO[LogicalNetworkIO[M]]],
mngr: DecoupledIO[LogicalNetworkIO[M]]) {
def hasData(m: LogicalNetworkIO[M]) = m.payload.hasMultibeatData()
val arb = Module(new LockingRRArbiter(mngr.bits, arbN, tlDataBeats, Some(hasData _)))
clts.zipWithIndex.zip(arb.io.in).map{ case ((req, id), arb) => {
arb.valid := req.valid
arb.bits := req.bits
arb.bits.payload.client_xact_id := clientSourcedClientXactId(req.bits.payload, id)
req.ready := arb.ready
}}
mngr <> arb.io.out
}
def hookupClientSourceHeaderless[M <: ClientSourcedWithIdAndData](
clts: Seq[DecoupledIO[M]],
mngr: DecoupledIO[M]) {
def hasData(m: M) = m.hasMultibeatData()
val arb = Module(new LockingRRArbiter(mngr.bits, arbN, tlDataBeats, Some(hasData _)))
clts.zipWithIndex.zip(arb.io.in).map{ case ((req, id), arb) => {
arb.valid := req.valid
arb.bits := req.bits
arb.bits.client_xact_id := clientSourcedClientXactId(req.bits, id)
req.ready := arb.ready
}}
mngr <> arb.io.out
}
def hookupManagerSourceWithHeader[M <: ManagerToClientChannel](
clts: Seq[DecoupledIO[LogicalNetworkIO[M]]],
mngr: DecoupledIO[LogicalNetworkIO[M]]) {
mngr.ready := Bool(false)
for (i <- 0 until arbN) {
clts(i).valid := Bool(false)
when (mngr.bits.header.dst === UInt(i)) {
clts(i).valid := mngr.valid
mngr.ready := clts(i).ready
}
clts(i).bits := mngr.bits
}
}
def hookupManagerSourceWithId[M <: ManagerSourcedWithId](
clts: Seq[DecoupledIO[LogicalNetworkIO[M]]],
mngr: DecoupledIO[LogicalNetworkIO[M]]) {
mngr.ready := Bool(false)
for (i <- 0 until arbN) {
clts(i).valid := Bool(false)
when (arbIdx(mngr.bits.payload) === UInt(i)) {
clts(i).valid := mngr.valid
mngr.ready := clts(i).ready
}
clts(i).bits := mngr.bits
clts(i).bits.payload.client_xact_id := managerSourcedClientXactId(mngr.bits.payload)
}
}
def hookupManagerSourceHeaderlessWithId[M <: ManagerSourcedWithId](
clts: Seq[DecoupledIO[M]],
mngr: DecoupledIO[M]) {
mngr.ready := Bool(false)
for (i <- 0 until arbN) {
clts(i).valid := Bool(false)
when (arbIdx(mngr.bits) === UInt(i)) {
clts(i).valid := mngr.valid
mngr.ready := clts(i).ready
}
clts(i).bits := mngr.bits
clts(i).bits.client_xact_id := managerSourcedClientXactId(mngr.bits)
}
}
def hookupManagerSourceBroadcast[M <: Data](clts: Seq[DecoupledIO[M]], mngr: DecoupledIO[M]) {
clts.map{ _.valid := mngr.valid }
clts.map{ _.bits := mngr.bits }
mngr.ready := clts.map(_.ready).reduce(_&&_)
}
def hookupFinish[M <: LogicalNetworkIO[Finish]]( clts: Seq[DecoupledIO[M]], mngr: DecoupledIO[M]) {
val arb = Module(new RRArbiter(mngr.bits, arbN))
arb.io.in <> clts
mngr <> arb.io.out
}
}
/** Abstract base case for any Arbiters that have UncachedTileLinkIOs */
abstract class UncachedTileLinkIOArbiter(val arbN: Int)(implicit val p: Parameters) extends Module
with TileLinkArbiterLike {
val io = new Bundle {
val in = Vec(arbN, new UncachedTileLinkIO).flip
val out = new UncachedTileLinkIO
}
hookupClientSource(io.in.map(_.acquire), io.out.acquire)
hookupFinish(io.in.map(_.finish), io.out.finish)
hookupManagerSourceWithId(io.in.map(_.grant), io.out.grant)
}
/** Abstract base case for any Arbiters that have cached TileLinkIOs */
abstract class TileLinkIOArbiter(val arbN: Int)(implicit val p: Parameters) extends Module
with TileLinkArbiterLike {
val io = new Bundle {
val in = Vec(arbN, new TileLinkIO).flip
val out = new TileLinkIO
}
hookupClientSource(io.in.map(_.acquire), io.out.acquire)
hookupClientSource(io.in.map(_.release), io.out.release)
hookupFinish(io.in.map(_.finish), io.out.finish)
hookupManagerSourceBroadcast(io.in.map(_.probe), io.out.probe)
hookupManagerSourceWithId(io.in.map(_.grant), io.out.grant)
}
/** Appends the port index of the arbiter to the client_xact_id */
trait AppendsArbiterId extends TileLinkArbiterLike {
def clientSourcedClientXactId(in: ClientSourcedWithId, id: Int) =
Cat(in.client_xact_id, UInt(id, log2Up(arbN)))
def managerSourcedClientXactId(in: ManagerSourcedWithId) = {
/* This shouldn't be necessary, but Chisel3 doesn't emit correct Verilog
* when right shifting by too many bits. See
* https://github.com/ucb-bar/firrtl/issues/69 */
if (in.client_xact_id.getWidth > log2Up(arbN))
in.client_xact_id >> log2Up(arbN)
else
UInt(0)
}
def arbIdx(in: ManagerSourcedWithId) = in.client_xact_id(log2Up(arbN)-1,0)
}
/** Uses the client_xact_id as is (assumes it has been set to port index) */
trait PassesId extends TileLinkArbiterLike {
def clientSourcedClientXactId(in: ClientSourcedWithId, id: Int) = in.client_xact_id
def managerSourcedClientXactId(in: ManagerSourcedWithId) = in.client_xact_id
def arbIdx(in: ManagerSourcedWithId) = in.client_xact_id
}
/** Overwrites some default client_xact_id with the port idx */
trait UsesNewId extends TileLinkArbiterLike {
def clientSourcedClientXactId(in: ClientSourcedWithId, id: Int) = UInt(id, log2Up(arbN))
def managerSourcedClientXactId(in: ManagerSourcedWithId) = UInt(0)
def arbIdx(in: ManagerSourcedWithId) = in.client_xact_id
}
// Now we can mix-in thevarious id-generation traits to make concrete arbiter classes
class UncachedTileLinkIOArbiterThatAppendsArbiterId(val n: Int)(implicit p: Parameters) extends UncachedTileLinkIOArbiter(n)(p) with AppendsArbiterId
class UncachedTileLinkIOArbiterThatPassesId(val n: Int)(implicit p: Parameters) extends UncachedTileLinkIOArbiter(n)(p) with PassesId
class UncachedTileLinkIOArbiterThatUsesNewId(val n: Int)(implicit p: Parameters) extends UncachedTileLinkIOArbiter(n)(p) with UsesNewId
class TileLinkIOArbiterThatAppendsArbiterId(val n: Int)(implicit p: Parameters) extends TileLinkIOArbiter(n)(p) with AppendsArbiterId
class TileLinkIOArbiterThatPassesId(val n: Int)(implicit p: Parameters) extends TileLinkIOArbiter(n)(p) with PassesId
class TileLinkIOArbiterThatUsesNewId(val n: Int)(implicit p: Parameters) extends TileLinkIOArbiter(n)(p) with UsesNewId
/** Concrete uncached client-side arbiter that appends the arbiter's port id to client_xact_id */
class ClientUncachedTileLinkIOArbiter(val arbN: Int)(implicit val p: Parameters) extends Module with TileLinkArbiterLike with AppendsArbiterId {
val io = new Bundle {
val in = Vec(arbN, new ClientUncachedTileLinkIO).flip
val out = new ClientUncachedTileLinkIO
}
if (arbN > 1) {
hookupClientSourceHeaderless(io.in.map(_.acquire), io.out.acquire)
hookupManagerSourceHeaderlessWithId(io.in.map(_.grant), io.out.grant)
} else { io.out <> io.in.head }
}
/** Concrete client-side arbiter that appends the arbiter's port id to client_xact_id */
class ClientTileLinkIOArbiter(val arbN: Int)(implicit val p: Parameters) extends Module with TileLinkArbiterLike with AppendsArbiterId {
val io = new Bundle {
val in = Vec(arbN, new ClientTileLinkIO).flip
val out = new ClientTileLinkIO
}
if (arbN > 1) {
hookupClientSourceHeaderless(io.in.map(_.acquire), io.out.acquire)
hookupClientSourceHeaderless(io.in.map(_.release), io.out.release)
hookupManagerSourceBroadcast(io.in.map(_.probe), io.out.probe)
hookupManagerSourceHeaderlessWithId(io.in.map(_.grant), io.out.grant)
} else { io.out <> io.in.head }
}

View File

@ -0,0 +1,971 @@
// See LICENSE for license details.
package uncore.tilelink
import Chisel._
import junctions._
import uncore.coherence.CoherencePolicy
import uncore.util._
import scala.math.max
import uncore.constants._
import cde.{Parameters, Field}
case object CacheBlockOffsetBits extends Field[Int]
case object AmoAluOperandBits extends Field[Int]
case object TLId extends Field[String]
case class TLKey(id: String) extends Field[TileLinkParameters]
/** Parameters exposed to the top-level design, set based on
* external requirements or design space exploration
*
* Coherency policy used to define custom mesage types
* Number of manager agents
* Number of client agents that cache data and use custom [[uncore.Acquire]] types
* Number of client agents that do not cache data and use built-in [[uncore.Acquire]] types
* Maximum number of unique outstanding transactions per client
* Maximum number of clients multiplexed onto a single port
* Maximum number of unique outstanding transactions per manager
* Width of cache block addresses
* Total amount of data per cache block
* Number of data beats per cache block
**/
case class TileLinkParameters(
coherencePolicy: CoherencePolicy,
nManagers: Int,
nCachingClients: Int,
nCachelessClients: Int,
maxClientXacts: Int,
maxClientsPerPort: Int,
maxManagerXacts: Int,
dataBits: Int,
dataBeats: Int = 4,
overrideDataBitsPerBeat: Option[Int] = None
) {
val nClients = nCachingClients + nCachelessClients
val writeMaskBits: Int = ((dataBits / dataBeats) - 1) / 8 + 1
val dataBitsPerBeat: Int = overrideDataBitsPerBeat.getOrElse(dataBits / dataBeats)
}
/** Utility trait for building Modules and Bundles that use TileLink parameters */
trait HasTileLinkParameters {
implicit val p: Parameters
val tlExternal = p(TLKey(p(TLId)))
val tlCoh = tlExternal.coherencePolicy
val tlNManagers = tlExternal.nManagers
val tlNCachingClients = tlExternal.nCachingClients
val tlNCachelessClients = tlExternal.nCachelessClients
val tlNClients = tlExternal.nClients
val tlClientIdBits = log2Up(tlNClients)
val tlManagerIdBits = log2Up(tlNManagers)
val tlMaxClientXacts = tlExternal.maxClientXacts
val tlMaxClientsPerPort = tlExternal.maxClientsPerPort
val tlMaxManagerXacts = tlExternal.maxManagerXacts
val tlClientXactIdBits = log2Up(tlMaxClientXacts*tlMaxClientsPerPort)
val tlManagerXactIdBits = log2Up(tlMaxManagerXacts)
val tlBlockAddrBits = p(PAddrBits) - p(CacheBlockOffsetBits)
val tlDataBeats = tlExternal.dataBeats
val tlDataBits = tlExternal.dataBitsPerBeat
val tlDataBytes = tlDataBits/8
val tlWriteMaskBits = tlExternal.writeMaskBits
val tlBeatAddrBits = log2Up(tlDataBeats)
val tlByteAddrBits = log2Up(tlWriteMaskBits)
val tlMemoryOpcodeBits = M_SZ
val tlMemoryOperandSizeBits = log2Ceil(log2Ceil(tlWriteMaskBits) + 1)
val tlAcquireTypeBits = max(log2Up(Acquire.nBuiltInTypes),
tlCoh.acquireTypeWidth)
val tlAcquireUnionBits = max(tlWriteMaskBits,
(tlByteAddrBits +
tlMemoryOperandSizeBits +
tlMemoryOpcodeBits)) + 1
val tlGrantTypeBits = max(log2Up(Grant.nBuiltInTypes),
tlCoh.grantTypeWidth) + 1
/** Whether the underlying physical network preserved point-to-point ordering of messages */
val tlNetworkPreservesPointToPointOrdering = false
val tlNetworkDoesNotInterleaveBeats = true
val amoAluOperandBits = p(AmoAluOperandBits)
val amoAluOperandBytes = amoAluOperandBits/8
}
abstract class TLModule(implicit val p: Parameters) extends Module
with HasTileLinkParameters
abstract class TLBundle(implicit val p: Parameters) extends junctions.ParameterizedBundle()(p)
with HasTileLinkParameters
/** Base trait for all TileLink channels */
abstract class TileLinkChannel(implicit p: Parameters) extends TLBundle()(p) {
def hasData(dummy: Int = 0): Bool
def hasMultibeatData(dummy: Int = 0): Bool
}
/** Directionality of message channel. Used to hook up logical network ports to physical network ports */
abstract class ClientToManagerChannel(implicit p: Parameters) extends TileLinkChannel()(p)
/** Directionality of message channel. Used to hook up logical network ports to physical network ports */
abstract class ManagerToClientChannel(implicit p: Parameters) extends TileLinkChannel()(p)
/** Directionality of message channel. Used to hook up logical network ports to physical network ports */
abstract class ClientToClientChannel(implicit p: Parameters) extends TileLinkChannel()(p) // Unused for now
/** Common signals that are used in multiple channels.
* These traits are useful for type parameterizing bundle wiring functions.
*/
/** Address of a cache block. */
trait HasCacheBlockAddress extends HasTileLinkParameters {
val addr_block = UInt(width = tlBlockAddrBits)
def conflicts(that: HasCacheBlockAddress) = this.addr_block === that.addr_block
def conflicts(addr: UInt) = this.addr_block === addr
}
/** Sub-block address or beat id of multi-beat data */
trait HasTileLinkBeatId extends HasTileLinkParameters {
val addr_beat = UInt(width = tlBeatAddrBits)
}
/* Client-side transaction id. Usually Miss Status Handling Register File index */
trait HasClientTransactionId extends HasTileLinkParameters {
val client_xact_id = Bits(width = tlClientXactIdBits)
}
/** Manager-side transaction id. Usually Transaction Status Handling Register File index. */
trait HasManagerTransactionId extends HasTileLinkParameters {
val manager_xact_id = Bits(width = tlManagerXactIdBits)
}
/** A single beat of cache block data */
trait HasTileLinkData extends HasTileLinkBeatId {
val data = UInt(width = tlDataBits)
def hasData(dummy: Int = 0): Bool
def hasMultibeatData(dummy: Int = 0): Bool
def first(dummy: Int = 0): Bool = !hasMultibeatData() || addr_beat === UInt(0)
def last(dummy: Int = 0): Bool = !hasMultibeatData() || addr_beat === UInt(tlDataBeats-1)
}
/** An entire cache block of data */
trait HasTileLinkBlock extends HasTileLinkParameters {
val data_buffer = Vec(tlDataBeats, UInt(width = tlDataBits))
val wmask_buffer = Vec(tlDataBeats, UInt(width = tlWriteMaskBits))
}
/** The id of a client source or destination. Used in managers. */
trait HasClientId extends HasTileLinkParameters {
val client_id = UInt(width = tlClientIdBits)
}
trait HasManagerId extends HasTileLinkParameters {
val manager_id = UInt(width = tlManagerIdBits)
}
trait HasAcquireUnion extends HasTileLinkParameters {
val union = Bits(width = tlAcquireUnionBits)
// Utility funcs for accessing subblock union:
def isBuiltInType(t: UInt): Bool
val opCodeOff = 1
val opSizeOff = tlMemoryOpcodeBits + opCodeOff
val addrByteOff = tlMemoryOperandSizeBits + opSizeOff
val addrByteMSB = tlByteAddrBits + addrByteOff
/** Hint whether to allocate the block in any interveneing caches */
def allocate(dummy: Int = 0) = union(0)
/** Op code for [[uncore.PutAtomic]] operations */
def op_code(dummy: Int = 0) = Mux(
isBuiltInType(Acquire.putType) || isBuiltInType(Acquire.putBlockType),
M_XWR, union(opSizeOff-1, opCodeOff))
/** Operand size for [[uncore.PutAtomic]] */
def op_size(dummy: Int = 0) = union(addrByteOff-1, opSizeOff)
/** Byte address for [[uncore.PutAtomic]] operand */
def addr_byte(dummy: Int = 0) = union(addrByteMSB-1, addrByteOff)
def amo_offset(dummy: Int = 0) =
if (tlByteAddrBits > log2Up(amoAluOperandBytes)) addr_byte()(tlByteAddrBits-1, log2Up(amoAluOperandBytes))
else UInt(0)
/** Bit offset of [[uncore.PutAtomic]] operand */
def amo_shift_bytes(dummy: Int = 0) = UInt(amoAluOperandBytes)*amo_offset()
/** Write mask for [[uncore.Put]], [[uncore.PutBlock]], [[uncore.PutAtomic]] */
def wmask(dummy: Int = 0): UInt = {
val is_amo = isBuiltInType(Acquire.putAtomicType)
val amo_mask = if (tlByteAddrBits > log2Up(amoAluOperandBytes))
FillInterleaved(amoAluOperandBytes, UIntToOH(amo_offset()))
else Acquire.fullWriteMask
val is_put = isBuiltInType(Acquire.putBlockType) || isBuiltInType(Acquire.putType)
val put_mask = union(tlWriteMaskBits, 1)
Mux(is_amo, amo_mask, Mux(is_put, put_mask, UInt(0)))
}
/** Full, beat-sized writemask */
def full_wmask(dummy: Int = 0) = FillInterleaved(8, wmask())
/** Is this message a built-in read message */
def hasPartialWritemask(dummy: Int = 0): Bool = wmask() =/= Acquire.fullWriteMask
}
trait HasAcquireType extends HasTileLinkParameters {
val is_builtin_type = Bool()
val a_type = UInt(width = tlAcquireTypeBits)
/** Message type equality */
def is(t: UInt) = a_type === t //TODO: make this more opaque; def ===?
/** Is this message a built-in or custom type */
def isBuiltInType(dummy: Int = 0): Bool = is_builtin_type
/** Is this message a particular built-in type */
def isBuiltInType(t: UInt): Bool = is_builtin_type && a_type === t
/** Does this message refer to subblock operands using info in the Acquire.union subbundle */
def isSubBlockType(dummy: Int = 0): Bool = isBuiltInType() && a_type.isOneOf(Acquire.typesOnSubBlocks)
/** Is this message a built-in prefetch message */
def isPrefetch(dummy: Int = 0): Bool = isBuiltInType() &&
(is(Acquire.getPrefetchType) || is(Acquire.putPrefetchType))
/** Is this message a built-in atomic message */
def isAtomic(dummy: Int = 0): Bool = isBuiltInType() && is(Acquire.putAtomicType)
/** Is this message a built-in read message */
def isGet(dummy: Int = 0): Bool = isBuiltInType() && (is(Acquire.getType) || is(Acquire.getBlockType))
/** Does this message contain data? Assumes that no custom message types have data. */
def hasData(dummy: Int = 0): Bool = isBuiltInType() && a_type.isOneOf(Acquire.typesWithData)
/** Does this message contain multiple beats of data? Assumes that no custom message types have data. */
def hasMultibeatData(dummy: Int = 0): Bool = Bool(tlDataBeats > 1) && isBuiltInType() &&
a_type.isOneOf(Acquire.typesWithMultibeatData)
/** Mapping between each built-in Acquire type and a built-in Grant type. */
def getBuiltInGrantType(dummy: Int = 0): UInt = Acquire.getBuiltInGrantType(this.a_type)
}
trait HasProbeType extends HasTileLinkParameters {
val p_type = UInt(width = tlCoh.probeTypeWidth)
def is(t: UInt) = p_type === t
def hasData(dummy: Int = 0) = Bool(false)
def hasMultibeatData(dummy: Int = 0) = Bool(false)
}
trait MightBeVoluntary {
def isVoluntary(dummy: Int = 0): Bool
}
trait HasReleaseType extends HasTileLinkParameters with MightBeVoluntary {
val voluntary = Bool()
val r_type = UInt(width = tlCoh.releaseTypeWidth)
def is(t: UInt) = r_type === t
def hasData(dummy: Int = 0) = r_type.isOneOf(tlCoh.releaseTypesWithData)
def hasMultibeatData(dummy: Int = 0) = Bool(tlDataBeats > 1) &&
r_type.isOneOf(tlCoh.releaseTypesWithData)
def isVoluntary(dummy: Int = 0) = voluntary
def requiresAck(dummy: Int = 0) = !Bool(tlNetworkPreservesPointToPointOrdering)
}
trait HasGrantType extends HasTileLinkParameters with MightBeVoluntary {
val is_builtin_type = Bool()
val g_type = UInt(width = tlGrantTypeBits)
// Helper funcs
def isBuiltInType(dummy: Int = 0): Bool = is_builtin_type
def isBuiltInType(t: UInt): Bool = is_builtin_type && g_type === t
def is(t: UInt):Bool = g_type === t
def hasData(dummy: Int = 0): Bool = Mux(isBuiltInType(),
g_type.isOneOf(Grant.typesWithData),
g_type.isOneOf(tlCoh.grantTypesWithData))
def hasMultibeatData(dummy: Int = 0): Bool =
Bool(tlDataBeats > 1) && Mux(isBuiltInType(),
g_type.isOneOf(Grant.typesWithMultibeatData),
g_type.isOneOf(tlCoh.grantTypesWithData))
def isVoluntary(dummy: Int = 0): Bool = isBuiltInType() && (g_type === Grant.voluntaryAckType)
def requiresAck(dummy: Int = 0): Bool = !Bool(tlNetworkPreservesPointToPointOrdering) && !isVoluntary()
}
/** TileLink channel bundle definitions */
/** The Acquire channel is used to intiate coherence protocol transactions in
* order to gain access to a cache block's data with certain permissions
* enabled. Messages sent over this channel may be custom types defined by
* a [[uncore.CoherencePolicy]] for cached data accesse or may be built-in types
* used for uncached data accesses. Acquires may contain data for Put or
* PutAtomic built-in types. After sending an Acquire, clients must
* wait for a manager to send them a [[uncore.Grant]] message in response.
*/
class AcquireMetadata(implicit p: Parameters) extends ClientToManagerChannel
with HasCacheBlockAddress
with HasClientTransactionId
with HasTileLinkBeatId
with HasAcquireType
with HasAcquireUnion {
/** Complete physical address for block, beat or operand */
def full_addr(dummy: Int = 0) =
Cat(this.addr_block, this.addr_beat,
Mux(isBuiltInType() && this.a_type.isOneOf(Acquire.typesWithAddrByte),
this.addr_byte(), UInt(0, tlByteAddrBits)))
}
/** [[uncore.AcquireMetadata]] with an extra field containing the data beat */
class Acquire(implicit p: Parameters) extends AcquireMetadata
with HasTileLinkData
/** [[uncore.AcquireMetadata]] with an extra field containing the entire cache block */
class BufferedAcquire(implicit p: Parameters) extends AcquireMetadata
with HasTileLinkBlock
/** [[uncore.Acquire]] with an extra field stating its source id */
class AcquireFromSrc(implicit p: Parameters) extends Acquire
with HasClientId
/** [[uncore.BufferedAcquire]] with an extra field stating its source id */
class BufferedAcquireFromSrc(implicit p: Parameters) extends BufferedAcquire
with HasClientId
/** Used to track metadata for transactions where multiple secondary misses have been merged
* and handled by a single transaction tracker.
*/
class SecondaryMissInfo(implicit p: Parameters) extends TLBundle
with HasClientTransactionId
with HasTileLinkBeatId
with HasClientId
with HasAcquireType
/** Contains definitions of the the built-in Acquire types and a factory
* for [[uncore.Acquire]]
*
* In general you should avoid using this factory directly and use
* [[uncore.ClientMetadata.makeAcquire]] for custom cached Acquires and
* [[uncore.Get]], [[uncore.Put]], etc. for built-in uncached Acquires.
*
* @param is_builtin_type built-in or custom type message?
* @param a_type built-in type enum or custom type enum
* @param client_xact_id client's transaction id
* @param addr_block address of the cache block
* @param addr_beat sub-block address (which beat)
* @param data data being put outwards
* @param union additional fields used for uncached types
*/
object Acquire {
val nBuiltInTypes = 7
//TODO: Use Enum
def getType = UInt("b000") // Get a single beat of data
def getBlockType = UInt("b001") // Get a whole block of data
def putType = UInt("b010") // Put a single beat of data
def putBlockType = UInt("b011") // Put a whole block of data
def putAtomicType = UInt("b100") // Perform an atomic memory op
def getPrefetchType = UInt("b101") // Prefetch a whole block of data
def putPrefetchType = UInt("b110") // Prefetch a whole block of data, with intent to write
def typesWithData = Vec(putType, putBlockType, putAtomicType)
def typesWithMultibeatData = Vec(putBlockType)
def typesOnSubBlocks = Vec(putType, getType, putAtomicType)
def typesWithAddrByte = Vec(getType, putAtomicType)
/** Mapping between each built-in Acquire type and a built-in Grant type. */
def getBuiltInGrantType(a_type: UInt): UInt = {
MuxLookup(a_type, Grant.putAckType, Array(
Acquire.getType -> Grant.getDataBeatType,
Acquire.getBlockType -> Grant.getDataBlockType,
Acquire.putType -> Grant.putAckType,
Acquire.putBlockType -> Grant.putAckType,
Acquire.putAtomicType -> Grant.getDataBeatType,
Acquire.getPrefetchType -> Grant.prefetchAckType,
Acquire.putPrefetchType -> Grant.prefetchAckType))
}
def makeUnion(
a_type: UInt,
addr_byte: UInt,
operand_size: UInt,
opcode: UInt,
wmask: UInt,
alloc: Bool)
(implicit p: Parameters): UInt = {
val tlExternal = p(TLKey(p(TLId)))
val tlWriteMaskBits = tlExternal.writeMaskBits
val tlByteAddrBits = log2Up(tlWriteMaskBits)
val tlMemoryOperandSizeBits = log2Ceil(log2Ceil(tlWriteMaskBits) + 1)
// These had better be the right size when we cat them together!
val my_addr_byte = (UInt(0, tlByteAddrBits) | addr_byte)(tlByteAddrBits-1, 0)
val my_operand_size = (UInt(0, tlMemoryOperandSizeBits) | operand_size)(tlMemoryOperandSizeBits-1, 0)
val my_opcode = (UInt(0, M_SZ) | opcode)(M_SZ-1, 0)
val my_wmask = (UInt(0, tlWriteMaskBits) | wmask)(tlWriteMaskBits-1, 0)
MuxLookup(a_type, UInt(0), Array(
Acquire.getType -> Cat(my_addr_byte, my_operand_size, my_opcode, alloc),
Acquire.getBlockType -> Cat(my_operand_size, my_opcode, alloc),
Acquire.putType -> Cat(my_wmask, alloc),
Acquire.putBlockType -> Cat(my_wmask, alloc),
Acquire.putAtomicType -> Cat(my_addr_byte, my_operand_size, my_opcode, alloc),
Acquire.getPrefetchType -> Cat(M_XRD, alloc),
Acquire.putPrefetchType -> Cat(M_XWR, alloc)))
}
def fullWriteMask(implicit p: Parameters) = SInt(-1, width = p(TLKey(p(TLId))).writeMaskBits).asUInt
def fullOperandSize(implicit p: Parameters) = {
val dataBits = p(TLKey(p(TLId))).dataBitsPerBeat
UInt(log2Ceil(dataBits / 8))
}
// Most generic constructor
def apply(
is_builtin_type: Bool,
a_type: Bits,
client_xact_id: UInt,
addr_block: UInt,
addr_beat: UInt = UInt(0),
data: UInt = UInt(0),
union: UInt = UInt(0))
(implicit p: Parameters): Acquire = {
val acq = Wire(new Acquire)
acq.is_builtin_type := is_builtin_type
acq.a_type := a_type
acq.client_xact_id := client_xact_id
acq.addr_block := addr_block
acq.addr_beat := addr_beat
acq.data := data
acq.union := union
acq
}
// Copy constructor
def apply(a: Acquire): Acquire = {
val acq = Wire(new Acquire()(a.p))
acq := a
acq
}
}
object BuiltInAcquireBuilder {
def apply(
a_type: UInt,
client_xact_id: UInt,
addr_block: UInt,
addr_beat: UInt = UInt(0),
data: UInt = UInt(0),
addr_byte: UInt = UInt(0),
operand_size: UInt = UInt(0),
opcode: UInt = UInt(0),
wmask: UInt = UInt(0),
alloc: Bool = Bool(true))
(implicit p: Parameters): Acquire = {
Acquire(
is_builtin_type = Bool(true),
a_type = a_type,
client_xact_id = client_xact_id,
addr_block = addr_block,
addr_beat = addr_beat,
data = data,
union = Acquire.makeUnion(a_type, addr_byte, operand_size, opcode, wmask, alloc))
}
}
/** Get a single beat of data from the outer memory hierarchy
*
* The client can hint whether he block containing this beat should be
* allocated in the intervening levels of the hierarchy.
*
* @param client_xact_id client's transaction id
* @param addr_block address of the cache block
* @param addr_beat sub-block address (which beat)
* @param addr_byte sub-block address (which byte)
* @param operand_size {byte, half, word, double} from [[uncore.MemoryOpConstants]]
* @param alloc hint whether the block should be allocated in intervening caches
*/
object Get {
def apply(
client_xact_id: UInt,
addr_block: UInt,
addr_beat: UInt,
alloc: Bool = Bool(true))
(implicit p: Parameters): Acquire = {
BuiltInAcquireBuilder(
a_type = Acquire.getType,
client_xact_id = client_xact_id,
addr_block = addr_block,
addr_beat = addr_beat,
operand_size = Acquire.fullOperandSize,
opcode = M_XRD,
alloc = alloc)
}
def apply(
client_xact_id: UInt,
addr_block: UInt,
addr_beat: UInt,
addr_byte: UInt,
operand_size: UInt,
alloc: Bool)
(implicit p: Parameters): Acquire = {
BuiltInAcquireBuilder(
a_type = Acquire.getType,
client_xact_id = client_xact_id,
addr_block = addr_block,
addr_beat = addr_beat,
addr_byte = addr_byte,
operand_size = operand_size,
opcode = M_XRD,
alloc = alloc)
}
}
/** Get a whole cache block of data from the outer memory hierarchy
*
* The client can hint whether the block should be allocated in the
* intervening levels of the hierarchy.
*
* @param client_xact_id client's transaction id
* @param addr_block address of the cache block
* @param alloc hint whether the block should be allocated in intervening caches
*/
object GetBlock {
def apply(
client_xact_id: UInt = UInt(0),
addr_block: UInt,
alloc: Bool = Bool(true))
(implicit p: Parameters): Acquire = {
BuiltInAcquireBuilder(
a_type = Acquire.getBlockType,
client_xact_id = client_xact_id,
addr_block = addr_block,
operand_size = Acquire.fullOperandSize,
opcode = M_XRD,
alloc = alloc)
}
}
/** Prefetch a cache block into the next-outermost level of the memory hierarchy
* with read permissions.
*
* @param client_xact_id client's transaction id
* @param addr_block address of the cache block
*/
object GetPrefetch {
def apply(
client_xact_id: UInt,
addr_block: UInt)
(implicit p: Parameters): Acquire = {
BuiltInAcquireBuilder(
a_type = Acquire.getPrefetchType,
client_xact_id = client_xact_id,
addr_block = addr_block)
}
}
/** Put a single beat of data into the outer memory hierarchy
*
* The block will be allocated in the next-outermost level of the hierarchy.
*
* @param client_xact_id client's transaction id
* @param addr_block address of the cache block
* @param addr_beat sub-block address (which beat)
* @param data data being refilled to the original requestor
* @param wmask per-byte write mask for this beat
* @param alloc hint whether the block should be allocated in intervening caches
*/
object Put {
def apply(
client_xact_id: UInt,
addr_block: UInt,
addr_beat: UInt,
data: UInt,
wmask: Option[UInt]= None,
alloc: Bool = Bool(true))
(implicit p: Parameters): Acquire = {
BuiltInAcquireBuilder(
a_type = Acquire.putType,
addr_block = addr_block,
addr_beat = addr_beat,
client_xact_id = client_xact_id,
data = data,
wmask = wmask.getOrElse(Acquire.fullWriteMask),
alloc = alloc)
}
}
/** Put a whole cache block of data into the outer memory hierarchy
*
* If the write mask is not full, the block will be allocated in the
* next-outermost level of the hierarchy. If the write mask is full, the
* client can hint whether the block should be allocated or not.
*
* @param client_xact_id client's transaction id
* @param addr_block address of the cache block
* @param addr_beat sub-block address (which beat of several)
* @param data data being refilled to the original requestor
* @param wmask per-byte write mask for this beat
* @param alloc hint whether the block should be allocated in intervening caches
*/
object PutBlock {
def apply(
client_xact_id: UInt,
addr_block: UInt,
addr_beat: UInt,
data: UInt,
wmask: Option[UInt] = None,
alloc: Bool = Bool(true))
(implicit p: Parameters): Acquire = {
BuiltInAcquireBuilder(
a_type = Acquire.putBlockType,
client_xact_id = client_xact_id,
addr_block = addr_block,
addr_beat = addr_beat,
data = data,
wmask = wmask.getOrElse(Acquire.fullWriteMask),
alloc = alloc)
}
}
/** Prefetch a cache block into the next-outermost level of the memory hierarchy
* with write permissions.
*
* @param client_xact_id client's transaction id
* @param addr_block address of the cache block
*/
object PutPrefetch {
def apply(
client_xact_id: UInt,
addr_block: UInt)
(implicit p: Parameters): Acquire = {
BuiltInAcquireBuilder(
a_type = Acquire.putPrefetchType,
client_xact_id = client_xact_id,
addr_block = addr_block)
}
}
/** Perform an atomic memory operation in the next-outermost level of the memory hierarchy
*
* @param client_xact_id client's transaction id
* @param addr_block address of the cache block
* @param addr_beat sub-block address (within which beat)
* @param addr_byte sub-block address (which byte)
* @param atomic_opcode {swap, add, xor, and, min, max, minu, maxu} from [[uncore.MemoryOpConstants]]
* @param operand_size {byte, half, word, double} from [[uncore.MemoryOpConstants]]
* @param data source operand data
*/
object PutAtomic {
def apply(
client_xact_id: UInt,
addr_block: UInt,
addr_beat: UInt,
addr_byte: UInt,
atomic_opcode: UInt,
operand_size: UInt,
data: UInt)
(implicit p: Parameters): Acquire = {
BuiltInAcquireBuilder(
a_type = Acquire.putAtomicType,
client_xact_id = client_xact_id,
addr_block = addr_block,
addr_beat = addr_beat,
data = data,
addr_byte = addr_byte,
operand_size = operand_size,
opcode = atomic_opcode)
}
}
/** The Probe channel is used to force clients to release data or cede permissions
* on a cache block. Clients respond to Probes with [[uncore.Release]] messages.
* The available types of Probes are customized by a particular
* [[uncore.CoherencePolicy]].
*/
class Probe(implicit p: Parameters) extends ManagerToClientChannel
with HasCacheBlockAddress
with HasProbeType
/** [[uncore.Probe]] with an extra field stating its destination id */
class ProbeToDst(implicit p: Parameters) extends Probe()(p) with HasClientId
/** Contains factories for [[uncore.Probe]] and [[uncore.ProbeToDst]]
*
* In general you should avoid using these factories directly and use
* [[uncore.ManagerMetadata.makeProbe(UInt,Acquire)* makeProbe]] instead.
*
* @param dst id of client to which probe should be sent
* @param p_type custom probe type
* @param addr_block address of the cache block
*/
object Probe {
def apply(p_type: UInt, addr_block: UInt)(implicit p: Parameters): Probe = {
val prb = Wire(new Probe)
prb.p_type := p_type
prb.addr_block := addr_block
prb
}
def apply(dst: UInt, p_type: UInt, addr_block: UInt)(implicit p: Parameters): ProbeToDst = {
val prb = Wire(new ProbeToDst)
prb.client_id := dst
prb.p_type := p_type
prb.addr_block := addr_block
prb
}
}
/** The Release channel is used to release data or permission back to the manager
* in response to [[uncore.Probe]] messages. It can also be used to voluntarily
* write back data, for example in the event that dirty data must be evicted on
* a cache miss. The available types of Release messages are always customized by
* a particular [[uncore.CoherencePolicy]]. Releases may contain data or may be
* simple acknowledgements. Voluntary Releases are acknowledged with [[uncore.Grant Grants]].
*/
class ReleaseMetadata(implicit p: Parameters) extends ClientToManagerChannel
with HasTileLinkBeatId
with HasCacheBlockAddress
with HasClientTransactionId
with HasReleaseType {
def full_addr(dummy: Int = 0) = Cat(this.addr_block, this.addr_beat, UInt(0, width = tlByteAddrBits))
}
/** [[uncore.ReleaseMetadata]] with an extra field containing the data beat */
class Release(implicit p: Parameters) extends ReleaseMetadata
with HasTileLinkData
/** [[uncore.ReleaseMetadata]] with an extra field containing the entire cache block */
class BufferedRelease(implicit p: Parameters) extends ReleaseMetadata
with HasTileLinkBlock
/** [[uncore.Release]] with an extra field stating its source id */
class ReleaseFromSrc(implicit p: Parameters) extends Release
with HasClientId
/** [[uncore.BufferedRelease]] with an extra field stating its source id */
class BufferedReleaseFromSrc(implicit p: Parameters) extends BufferedRelease
with HasClientId
/** Contains a [[uncore.Release]] factory
*
* In general you should avoid using this factory directly and use
* [[uncore.ClientMetadata.makeRelease]] instead.
*
* @param voluntary is this a voluntary writeback
* @param r_type type enum defined by coherence protocol
* @param client_xact_id client's transaction id
* @param addr_block address of the cache block
* @param addr_beat beat id of the data
* @param data data being written back
*/
object Release {
def apply(
voluntary: Bool,
r_type: UInt,
client_xact_id: UInt,
addr_block: UInt,
addr_beat: UInt,
data: UInt)
(implicit p: Parameters): Release = {
val rel = Wire(new Release)
rel.r_type := r_type
rel.client_xact_id := client_xact_id
rel.addr_block := addr_block
rel.addr_beat := addr_beat
rel.data := data
rel.voluntary := voluntary
rel
}
def apply(
src: UInt,
voluntary: Bool,
r_type: UInt,
client_xact_id: UInt,
addr_block: UInt,
addr_beat: UInt = UInt(0),
data: UInt = UInt(0))
(implicit p: Parameters): ReleaseFromSrc = {
val rel = Wire(new ReleaseFromSrc)
rel.client_id := src
rel.voluntary := voluntary
rel.r_type := r_type
rel.client_xact_id := client_xact_id
rel.addr_block := addr_block
rel.addr_beat := addr_beat
rel.data := data
rel
}
}
/** The Grant channel is used to refill data or grant permissions requested of the
* manager agent via an [[uncore.Acquire]] message. It is also used to acknowledge
* the receipt of voluntary writeback from clients in the form of [[uncore.Release]]
* messages. There are built-in Grant messages used for Gets and Puts, and
* coherence policies may also define custom Grant types. Grants may contain data
* or may be simple acknowledgements. Grants are responded to with [[uncore.Finish]].
*/
class GrantMetadata(implicit p: Parameters) extends ManagerToClientChannel
with HasTileLinkBeatId
with HasClientTransactionId
with HasManagerTransactionId
with HasGrantType {
def makeFinish(dummy: Int = 0): Finish = {
val f = Wire(new Finish)
f.manager_xact_id := this.manager_xact_id
f
}
}
/** [[uncore.GrantMetadata]] with an extra field containing a single beat of data */
class Grant(implicit p: Parameters) extends GrantMetadata
with HasTileLinkData
/** [[uncore.Grant]] with an extra field stating its destination */
class GrantToDst(implicit p: Parameters) extends Grant
with HasClientId
/** [[uncore.Grant]] with an extra field stating its destination */
class GrantFromSrc(implicit p: Parameters) extends Grant
with HasManagerId {
override def makeFinish(dummy: Int = 0): FinishToDst = {
val f = Wire(new FinishToDst)
f.manager_xact_id := this.manager_xact_id
f.manager_id := this.manager_id
f
}
}
/** [[uncore.GrantMetadata]] with an extra field containing an entire cache block */
class BufferedGrant(implicit p: Parameters) extends GrantMetadata
with HasTileLinkBlock
/** [[uncore.BufferedGrant]] with an extra field stating its destination */
class BufferedGrantToDst(implicit p: Parameters) extends BufferedGrant
with HasClientId
/** Contains definitions of the the built-in grant types and factories
* for [[uncore.Grant]] and [[uncore.GrantToDst]]
*
* In general you should avoid using these factories directly and use
* [[uncore.ManagerMetadata.makeGrant(uncore.AcquireFromSrc* makeGrant]] instead.
*
* @param dst id of client to which grant should be sent
* @param is_builtin_type built-in or custom type message?
* @param g_type built-in type enum or custom type enum
* @param client_xact_id client's transaction id
* @param manager_xact_id manager's transaction id
* @param addr_beat beat id of the data
* @param data data being refilled to the original requestor
*/
object Grant {
val nBuiltInTypes = 5
def voluntaryAckType = UInt("b000") // For acking Releases
def prefetchAckType = UInt("b001") // For acking any kind of Prefetch
def putAckType = UInt("b011") // For acking any kind of non-prfetch Put
def getDataBeatType = UInt("b100") // Supplying a single beat of Get
def getDataBlockType = UInt("b101") // Supplying all beats of a GetBlock
def typesWithData = Vec(getDataBlockType, getDataBeatType)
def typesWithMultibeatData= Vec(getDataBlockType)
def apply(
is_builtin_type: Bool,
g_type: UInt,
client_xact_id: UInt,
manager_xact_id: UInt,
addr_beat: UInt,
data: UInt)
(implicit p: Parameters): Grant = {
val gnt = Wire(new Grant)
gnt.is_builtin_type := is_builtin_type
gnt.g_type := g_type
gnt.client_xact_id := client_xact_id
gnt.manager_xact_id := manager_xact_id
gnt.addr_beat := addr_beat
gnt.data := data
gnt
}
def apply(
dst: UInt,
is_builtin_type: Bool,
g_type: UInt,
client_xact_id: UInt,
manager_xact_id: UInt,
addr_beat: UInt = UInt(0),
data: UInt = UInt(0))
(implicit p: Parameters): GrantToDst = {
val gnt = Wire(new GrantToDst)
gnt.client_id := dst
gnt.is_builtin_type := is_builtin_type
gnt.g_type := g_type
gnt.client_xact_id := client_xact_id
gnt.manager_xact_id := manager_xact_id
gnt.addr_beat := addr_beat
gnt.data := data
gnt
}
}
/** The Finish channel is used to provide a global ordering of transactions
* in networks that do not guarantee point-to-point ordering of messages.
* A Finsish message is sent as acknowledgement of receipt of a [[uncore.Grant]].
* When a Finish message is received, a manager knows it is safe to begin
* processing other transactions that touch the same cache block.
*/
class Finish(implicit p: Parameters) extends ClientToManagerChannel()(p)
with HasManagerTransactionId {
def hasData(dummy: Int = 0) = Bool(false)
def hasMultibeatData(dummy: Int = 0) = Bool(false)
}
/** [[uncore.Finish]] with an extra field stating its destination */
class FinishToDst(implicit p: Parameters) extends Finish
with HasManagerId
/** Complete IO definition for incoherent TileLink, including networking headers */
class UncachedTileLinkIO(implicit p: Parameters) extends TLBundle()(p) {
val acquire = new DecoupledIO(new LogicalNetworkIO(new Acquire))
val grant = new DecoupledIO(new LogicalNetworkIO(new Grant)).flip
val finish = new DecoupledIO(new LogicalNetworkIO(new Finish))
}
/** Complete IO definition for coherent TileLink, including networking headers */
class TileLinkIO(implicit p: Parameters) extends UncachedTileLinkIO()(p) {
val probe = new DecoupledIO(new LogicalNetworkIO(new Probe)).flip
val release = new DecoupledIO(new LogicalNetworkIO(new Release))
}
/** This version of UncachedTileLinkIO does not contain network headers.
* It is intended for use within client agents.
*
* Headers are provided in the top-level that instantiates the clients and network,
* probably using a [[uncore.ClientTileLinkNetworkPort]] module.
* By eliding the header subbundles within the clients we can enable
* hierarchical P-and-R while minimizing unconnected port errors in GDS.
*
* Secondly, this version of the interface elides [[uncore.Finish]] messages, with the
* assumption that a [[uncore.FinishUnit]] has been coupled to the TileLinkIO port
* to deal with acking received [[uncore.Grant Grants]].
*/
class ClientUncachedTileLinkIO(implicit p: Parameters) extends TLBundle()(p) {
val acquire = new DecoupledIO(new Acquire)
val grant = new DecoupledIO(new Grant).flip
}
/** This version of TileLinkIO does not contain network headers.
* It is intended for use within client agents.
*/
class ClientTileLinkIO(implicit p: Parameters) extends TLBundle()(p) {
val acquire = new DecoupledIO(new Acquire)
val probe = new DecoupledIO(new Probe).flip
val release = new DecoupledIO(new Release)
val grant = new DecoupledIO(new GrantFromSrc).flip
val finish = new DecoupledIO(new FinishToDst)
}
/** This version of TileLinkIO does not contain network headers, but
* every channel does include an extra client_id subbundle.
* It is intended for use within Management agents.
*
* Managers need to track where [[uncore.Acquire]] and [[uncore.Release]] messages
* originated so that they can send a [[uncore.Grant]] to the right place.
* Similarly they must be able to issues Probes to particular clients.
* However, we'd still prefer to have [[uncore.ManagerTileLinkNetworkPort]] fill in
* the header.src to enable hierarchical p-and-r of the managers. Additionally,
* coherent clients might be mapped to random network port ids, and we'll leave it to the
* [[uncore.ManagerTileLinkNetworkPort]] to apply the correct mapping. Managers do need to
* see Finished so they know when to allow new transactions on a cache
* block to proceed.
*/
class ManagerTileLinkIO(implicit p: Parameters) extends TLBundle()(p) {
val acquire = new DecoupledIO(new AcquireFromSrc).flip
val grant = new DecoupledIO(new GrantToDst)
val finish = new DecoupledIO(new Finish).flip
val probe = new DecoupledIO(new ProbeToDst)
val release = new DecoupledIO(new ReleaseFromSrc).flip
}

View File

@ -0,0 +1,386 @@
package uncore.tilelink
import Chisel._
import junctions._
import scala.collection.mutable.ArraySeq
import uncore.util._
import cde.{Parameters, Field}
/** PortedTileLinkNetworks combine a TileLink protocol with a particular physical
* network implementation.
*
* Specifically, they provide mappings between ClientTileLinkIO/
* ManagerTileLinkIO channels and LogicalNetwork ports (i.e. generic
* TileLinkIO with networking headers). Channels coming into the network have
* appropriate networking headers appended and outgoing channels have their
* headers stripped.
*
* @constructor base class constructor for Ported TileLink NoC
* @param addrToManagerId a mapping from a physical address to the network
* id of a coherence manager
* @param sharerToClientId a mapping from the id of a particular coherent
* client (as determined by e.g. the directory) and the network id
* of that client
* @param clientDepths the depths of the queue that should be used to buffer
* each channel on the client side of the network
* @param managerDepths the depths of the queue that should be used to buffer
* each channel on the manager side of the network
*/
abstract class PortedTileLinkNetwork(
addrToManagerId: UInt => UInt,
sharerToClientId: UInt => UInt,
clientDepths: TileLinkDepths,
managerDepths: TileLinkDepths)
(implicit p: Parameters) extends TLModule()(p) {
val nClients = tlNClients
val nManagers = tlNManagers
val io = new Bundle {
val clients_cached = Vec(tlNCachingClients, new ClientTileLinkIO).flip
val clients_uncached = Vec(tlNCachelessClients, new ClientUncachedTileLinkIO).flip
val managers = Vec(nManagers, new ManagerTileLinkIO).flip
}
val clients = (io.clients_cached ++ io.clients_uncached).zipWithIndex.map {
case (io, idx) => {
val qs = Module(new TileLinkEnqueuer(clientDepths))
io match {
case c: ClientTileLinkIO => {
val port = Module(new ClientTileLinkNetworkPort(idx, addrToManagerId))
port.io.client <> c
qs.io.client <> port.io.network
qs.io.manager
}
case u: ClientUncachedTileLinkIO => {
val port = Module(new ClientUncachedTileLinkNetworkPort(idx, addrToManagerId))
port.io.client <> u
qs.io.client <> port.io.network
qs.io.manager
}
}
}
}
val managers = io.managers.zipWithIndex.map {
case (m, i) => {
val port = Module(new ManagerTileLinkNetworkPort(i, sharerToClientId))
val qs = Module(new TileLinkEnqueuer(managerDepths))
port.io.manager <> m
port.io.network <> qs.io.manager
qs.io.client
}
}
}
/** A simple arbiter for each channel that also deals with header-based routing.
* Assumes a single manager agent. */
class PortedTileLinkArbiter(
sharerToClientId: UInt => UInt = (u: UInt) => u,
clientDepths: TileLinkDepths = TileLinkDepths(0,0,0,0,0),
managerDepths: TileLinkDepths = TileLinkDepths(0,0,0,0,0))
(implicit p: Parameters)
extends PortedTileLinkNetwork(u => UInt(0), sharerToClientId, clientDepths, managerDepths)(p)
with TileLinkArbiterLike
with PassesId {
val arbN = nClients
require(nManagers == 1)
if(arbN > 1) {
hookupClientSource(clients.map(_.acquire), managers.head.acquire)
hookupClientSource(clients.map(_.release), managers.head.release)
hookupFinish(clients.map(_.finish), managers.head.finish)
hookupManagerSourceWithHeader(clients.map(_.probe), managers.head.probe)
hookupManagerSourceWithHeader(clients.map(_.grant), managers.head.grant)
} else {
managers.head <> clients.head
}
}
/** Provides a separate physical crossbar for each channel. Assumes multiple manager
* agents. Managers are assigned to higher physical network port ids than
* clients, and translations between logical network id and physical crossbar
* port id are done automatically.
*/
class PortedTileLinkCrossbar(
addrToManagerId: UInt => UInt = u => UInt(0),
sharerToClientId: UInt => UInt = u => u,
clientDepths: TileLinkDepths = TileLinkDepths(0,0,0,0,0),
managerDepths: TileLinkDepths = TileLinkDepths(0,0,0,0,0))
(implicit p: Parameters)
extends PortedTileLinkNetwork(addrToManagerId, sharerToClientId, clientDepths, managerDepths)(p) {
val n = p(LNEndpoints)
val phyHdrWidth = log2Up(n)
val count = tlDataBeats
// Actually instantiate the particular networks required for TileLink
val acqNet = Module(new BasicBus(CrossbarConfig(n, new Acquire, count, Some((a: PhysicalNetworkIO[Acquire]) => a.payload.hasMultibeatData()))))
val relNet = Module(new BasicBus(CrossbarConfig(n, new Release, count, Some((r: PhysicalNetworkIO[Release]) => r.payload.hasMultibeatData()))))
val prbNet = Module(new BasicBus(CrossbarConfig(n, new Probe)))
val gntNet = Module(new BasicBus(CrossbarConfig(n, new Grant, count, Some((g: PhysicalNetworkIO[Grant]) => g.payload.hasMultibeatData()))))
val ackNet = Module(new BasicBus(CrossbarConfig(n, new Finish)))
// Aliases for the various network IO bundle types
type PNIO[T <: Data] = DecoupledIO[PhysicalNetworkIO[T]]
type LNIO[T <: Data] = DecoupledIO[LogicalNetworkIO[T]]
type FromCrossbar[T <: Data] = PNIO[T] => LNIO[T]
type ToCrossbar[T <: Data] = LNIO[T] => PNIO[T]
// Shims for converting between logical network IOs and physical network IOs
def crossbarToManagerShim[T <: Data](in: PNIO[T]): LNIO[T] = {
val out = DefaultFromPhysicalShim(in)
out.bits.header.src := in.bits.header.src - UInt(nManagers)
out
}
def crossbarToClientShim[T <: Data](in: PNIO[T]): LNIO[T] = {
val out = DefaultFromPhysicalShim(in)
out.bits.header.dst := in.bits.header.dst - UInt(nManagers)
out
}
def managerToCrossbarShim[T <: Data](in: LNIO[T]): PNIO[T] = {
val out = DefaultToPhysicalShim(n, in)
out.bits.header.dst := in.bits.header.dst + UInt(nManagers, phyHdrWidth)
out
}
def clientToCrossbarShim[T <: Data](in: LNIO[T]): PNIO[T] = {
val out = DefaultToPhysicalShim(n, in)
out.bits.header.src := in.bits.header.src + UInt(nManagers, phyHdrWidth)
out
}
// Make an individual connection between virtual and physical ports using
// a particular shim. Also pin the unused Decoupled control signal low.
def doDecoupledInputHookup[T <: Data](phys_in: PNIO[T], phys_out: PNIO[T], log_io: LNIO[T], shim: ToCrossbar[T]) = {
val s = shim(log_io)
phys_in.valid := s.valid
phys_in.bits := s.bits
s.ready := phys_in.ready
phys_out.ready := Bool(false)
}
def doDecoupledOutputHookup[T <: Data](phys_in: PNIO[T], phys_out: PNIO[T], log_io: LNIO[T], shim: FromCrossbar[T]) = {
val s = shim(phys_out)
log_io.valid := s.valid
log_io.bits := s.bits
s.ready := log_io.ready
phys_in.valid := Bool(false)
}
//Hookup all instances of a particular subbundle of TileLink
def doDecoupledHookups[T <: Data](physIO: BasicCrossbarIO[T], getLogIO: TileLinkIO => LNIO[T]) = {
physIO.in.head.bits.payload match {
case c: ClientToManagerChannel => {
managers.zipWithIndex.map { case (i, id) =>
doDecoupledOutputHookup(physIO.in(id), physIO.out(id), getLogIO(i), crossbarToManagerShim[T])
}
clients.zipWithIndex.map { case (i, id) =>
doDecoupledInputHookup(physIO.in(id+nManagers), physIO.out(id+nManagers), getLogIO(i), clientToCrossbarShim[T])
}
}
case m: ManagerToClientChannel => {
managers.zipWithIndex.map { case (i, id) =>
doDecoupledInputHookup(physIO.in(id), physIO.out(id), getLogIO(i), managerToCrossbarShim[T])
}
clients.zipWithIndex.map { case (i, id) =>
doDecoupledOutputHookup(physIO.in(id+nManagers), physIO.out(id+nManagers), getLogIO(i), crossbarToClientShim[T])
}
}
}
}
doDecoupledHookups(acqNet.io, (tl: TileLinkIO) => tl.acquire)
doDecoupledHookups(relNet.io, (tl: TileLinkIO) => tl.release)
doDecoupledHookups(prbNet.io, (tl: TileLinkIO) => tl.probe)
doDecoupledHookups(gntNet.io, (tl: TileLinkIO) => tl.grant)
doDecoupledHookups(ackNet.io, (tl: TileLinkIO) => tl.finish)
}
class ClientUncachedTileLinkIORouter(
nOuter: Int, routeSel: UInt => UInt)(implicit p: Parameters)
extends TLModule {
val io = new Bundle {
val in = (new ClientUncachedTileLinkIO).flip
val out = Vec(nOuter, new ClientUncachedTileLinkIO)
}
val acq_route = routeSel(io.in.acquire.bits.full_addr())
io.in.acquire.ready := Bool(false)
io.out.zipWithIndex.foreach { case (out, i) =>
out.acquire.valid := io.in.acquire.valid && acq_route(i)
out.acquire.bits := io.in.acquire.bits
when (acq_route(i)) { io.in.acquire.ready := out.acquire.ready }
}
val gnt_arb = Module(new LockingRRArbiter(
new Grant, nOuter, tlDataBeats, Some((gnt: Grant) => gnt.hasMultibeatData())))
gnt_arb.io.in <> io.out.map(_.grant)
io.in.grant <> gnt_arb.io.out
assert(!io.in.acquire.valid || acq_route.orR, "No valid route")
}
class TileLinkInterconnectIO(val nInner: Int, val nOuter: Int)
(implicit p: Parameters) extends Bundle {
val in = Vec(nInner, new ClientUncachedTileLinkIO).flip
val out = Vec(nOuter, new ClientUncachedTileLinkIO)
}
class ClientUncachedTileLinkIOCrossbar(
nInner: Int, nOuter: Int, routeSel: UInt => UInt)
(implicit p: Parameters) extends TLModule {
val io = new TileLinkInterconnectIO(nInner, nOuter)
if (nInner == 1) {
val router = Module(new ClientUncachedTileLinkIORouter(nOuter, routeSel))
router.io.in <> io.in.head
io.out <> router.io.out
} else {
val routers = List.fill(nInner) {
Module(new ClientUncachedTileLinkIORouter(nOuter, routeSel)) }
val arbiters = List.fill(nOuter) {
Module(new ClientUncachedTileLinkIOArbiter(nInner)) }
for (i <- 0 until nInner) {
routers(i).io.in <> io.in(i)
}
for (i <- 0 until nOuter) {
arbiters(i).io.in <> routers.map(r => r.io.out(i))
io.out(i) <> arbiters(i).io.out
}
}
}
abstract class TileLinkInterconnect(implicit p: Parameters) extends TLModule()(p) {
val nInner: Int
val nOuter: Int
lazy val io = new TileLinkInterconnectIO(nInner, nOuter)
}
class TileLinkRecursiveInterconnect(val nInner: Int, addrMap: AddrMap)
(implicit p: Parameters) extends TileLinkInterconnect()(p) {
def port(name: String) = io.out(addrMap.port(name))
val nOuter = addrMap.numSlaves
val routeSel = (addr: UInt) =>
Cat(addrMap.entries.map(e => addrMap(e.name).containsAddress(addr)).reverse)
val xbar = Module(new ClientUncachedTileLinkIOCrossbar(nInner, addrMap.length, routeSel))
xbar.io.in <> io.in
io.out <> addrMap.entries.zip(xbar.io.out).flatMap {
case (entry, xbarOut) => {
entry.region match {
case submap: AddrMap if submap.isEmpty =>
xbarOut.acquire.ready := Bool(false)
xbarOut.grant.valid := Bool(false)
None
case submap: AddrMap if !submap.collapse =>
val ic = Module(new TileLinkRecursiveInterconnect(1, submap))
ic.io.in.head <> xbarOut
ic.io.out
case _ =>
Some(xbarOut)
}
}
}
}
class TileLinkMemoryInterconnect(
nBanksPerChannel: Int, nChannels: Int)
(implicit p: Parameters) extends TileLinkInterconnect()(p) {
val nBanks = nBanksPerChannel * nChannels
val nInner = nBanks
val nOuter = nChannels
def connectChannel(outer: ClientUncachedTileLinkIO, inner: ClientUncachedTileLinkIO) {
outer <> inner
outer.acquire.bits.addr_block := inner.acquire.bits.addr_block >> UInt(log2Ceil(nChannels))
}
for (i <- 0 until nChannels) {
/* Bank assignments to channels are strided so that consecutive banks
* map to different channels. That way, consecutive cache lines also
* map to different channels */
val banks = (i until nBanks by nChannels).map(j => io.in(j))
val channelArb = Module(new ClientUncachedTileLinkIOArbiter(nBanksPerChannel))
channelArb.io.in <> banks
connectChannel(io.out(i), channelArb.io.out)
}
}
/** Allows users to switch between various memory configurations. Note that
* this is a dangerous operation: not only does switching the select input to
* this module violate TileLink, it also causes the memory of the machine to
* become garbled. It's expected that select only changes at boot time, as
* part of the memory controller configuration. */
class TileLinkMemorySelectorIO(val nBanks: Int, val maxMemChannels: Int, nConfigs: Int)
(implicit p: Parameters)
extends TileLinkInterconnectIO(nBanks, maxMemChannels) {
val select = UInt(INPUT, width = log2Up(nConfigs))
override def cloneType =
new TileLinkMemorySelectorIO(nBanks, maxMemChannels, nConfigs).asInstanceOf[this.type]
}
class TileLinkMemorySelector(nBanks: Int, maxMemChannels: Int, configs: Seq[Int])
(implicit p: Parameters)
extends TileLinkInterconnect()(p) {
val nInner = nBanks
val nOuter = maxMemChannels
val nConfigs = configs.size
override lazy val io = new TileLinkMemorySelectorIO(nBanks, maxMemChannels, nConfigs)
def muxOnSelect[T <: Data](up: DecoupledIO[T], dn: DecoupledIO[T], active: Bool): Unit = {
when (active) { dn.bits := up.bits }
when (active) { up.ready := dn.ready }
when (active) { dn.valid := up.valid }
}
def muxOnSelect(up: ClientUncachedTileLinkIO, dn: ClientUncachedTileLinkIO, active: Bool): Unit = {
muxOnSelect(up.acquire, dn.acquire, active)
muxOnSelect(dn.grant, up.grant, active)
}
def muxOnSelect(up: Vec[ClientUncachedTileLinkIO], dn: Vec[ClientUncachedTileLinkIO], active: Bool) : Unit = {
for (i <- 0 until up.size)
muxOnSelect(up(i), dn(i), active)
}
/* Disconnects a vector of TileLink ports, which involves setting them to
* invalid. Due to Chisel reasons, we need to also set the bits to 0 (since
* there can't be any unconnected inputs). */
def disconnectOuter(outer: Vec[ClientUncachedTileLinkIO]) = {
outer.foreach{ m =>
m.acquire.valid := Bool(false)
m.acquire.bits := m.acquire.bits.fromBits(UInt(0))
m.grant.ready := Bool(false)
}
}
def disconnectInner(inner: Vec[ClientUncachedTileLinkIO]) = {
inner.foreach { m =>
m.grant.valid := Bool(false)
m.grant.bits := m.grant.bits.fromBits(UInt(0))
m.acquire.ready := Bool(false)
}
}
/* Provides default wires on all our outputs. */
disconnectOuter(io.out)
disconnectInner(io.in)
/* Constructs interconnects for each of the layouts suggested by the
* configuration and switches between them based on the select input. */
configs.zipWithIndex.foreach{ case (nChannels, select) =>
val nBanksPerChannel = nBanks / nChannels
val ic = Module(new TileLinkMemoryInterconnect(nBanksPerChannel, nChannels))
disconnectInner(ic.io.out)
disconnectOuter(ic.io.in)
muxOnSelect(io.in, ic.io.in, io.select === UInt(select))
muxOnSelect(ic.io.out, io.out, io.select === UInt(select))
}
}

View File

@ -0,0 +1,308 @@
// See LICENSE for license details.
package uncore.tilelink
import Chisel._
import uncore.util._
import cde.{Parameters, Field}
case object LNEndpoints extends Field[Int]
case object LNHeaderBits extends Field[Int]
class PhysicalHeader(n: Int) extends Bundle {
val src = UInt(width = log2Up(n))
val dst = UInt(width = log2Up(n))
}
class PhysicalNetworkIO[T <: Data](n: Int, dType: T) extends Bundle {
val header = new PhysicalHeader(n)
val payload = dType.cloneType
override def cloneType = new PhysicalNetworkIO(n,dType).asInstanceOf[this.type]
}
class BasicCrossbarIO[T <: Data](n: Int, dType: T) extends Bundle {
val in = Vec(n, Decoupled(new PhysicalNetworkIO(n,dType))).flip
val out = Vec(n, Decoupled(new PhysicalNetworkIO(n,dType)))
}
abstract class PhysicalNetwork extends Module
case class CrossbarConfig[T <: Data](n: Int, dType: T, count: Int = 1, needsLock: Option[PhysicalNetworkIO[T] => Bool] = None)
abstract class AbstractCrossbar[T <: Data](conf: CrossbarConfig[T]) extends PhysicalNetwork {
val io = new BasicCrossbarIO(conf.n, conf.dType)
}
class BasicBus[T <: Data](conf: CrossbarConfig[T]) extends AbstractCrossbar(conf) {
val arb = Module(new LockingRRArbiter(io.in(0).bits, conf.n, conf.count, conf.needsLock))
arb.io.in <> io.in
arb.io.out.ready := io.out(arb.io.out.bits.header.dst).ready
for ((out, i) <- io.out zipWithIndex) {
out.valid := arb.io.out.valid && arb.io.out.bits.header.dst === UInt(i)
out.bits := arb.io.out.bits
}
}
class BasicCrossbar[T <: Data](conf: CrossbarConfig[T]) extends AbstractCrossbar(conf) {
io.in.foreach { _.ready := Bool(false) }
io.out.zipWithIndex.map{ case (out, i) => {
val rrarb = Module(new LockingRRArbiter(io.in(0).bits, conf.n, conf.count, conf.needsLock))
(rrarb.io.in, io.in).zipped.map{ case (arb, in) => {
val destined = in.bits.header.dst === UInt(i)
arb.valid := in.valid && destined
arb.bits := in.bits
when (arb.ready && destined) { in.ready := Bool(true) }
}}
out <> rrarb.io.out
}}
}
abstract class LogicalNetwork extends Module
class LogicalHeader(implicit p: Parameters) extends junctions.ParameterizedBundle()(p) {
val src = UInt(width = p(LNHeaderBits))
val dst = UInt(width = p(LNHeaderBits))
}
class LogicalNetworkIO[T <: Data](dType: T)(implicit p: Parameters) extends Bundle {
val header = new LogicalHeader
val payload = dType.cloneType
override def cloneType = new LogicalNetworkIO(dType)(p).asInstanceOf[this.type]
}
object DecoupledLogicalNetworkIOWrapper {
def apply[T <: Data](
in: DecoupledIO[T],
src: UInt = UInt(0),
dst: UInt = UInt(0))
(implicit p: Parameters): DecoupledIO[LogicalNetworkIO[T]] = {
val out = Wire(Decoupled(new LogicalNetworkIO(in.bits)))
out.valid := in.valid
out.bits.payload := in.bits
out.bits.header.dst := dst
out.bits.header.src := src
in.ready := out.ready
out
}
}
object DecoupledLogicalNetworkIOUnwrapper {
def apply[T <: Data](in: DecoupledIO[LogicalNetworkIO[T]])
(implicit p: Parameters): DecoupledIO[T] = {
val out = Wire(Decoupled(in.bits.payload))
out.valid := in.valid
out.bits := in.bits.payload
in.ready := out.ready
out
}
}
object DefaultFromPhysicalShim {
def apply[T <: Data](in: DecoupledIO[PhysicalNetworkIO[T]])
(implicit p: Parameters): DecoupledIO[LogicalNetworkIO[T]] = {
val out = Wire(Decoupled(new LogicalNetworkIO(in.bits.payload)))
out.bits.header := in.bits.header
out.bits.payload := in.bits.payload
out.valid := in.valid
in.ready := out.ready
out
}
}
object DefaultToPhysicalShim {
def apply[T <: Data](n: Int, in: DecoupledIO[LogicalNetworkIO[T]])
(implicit p: Parameters): DecoupledIO[PhysicalNetworkIO[T]] = {
val out = Wire(Decoupled(new PhysicalNetworkIO(n, in.bits.payload)))
out.bits.header := in.bits.header
out.bits.payload := in.bits.payload
out.valid := in.valid
in.ready := out.ready
out
}
}
/** A helper module that automatically issues [[uncore.Finish]] messages in repsonse
* to [[uncore.Grant]] that it receives from a manager and forwards to a client
*/
class FinishUnit(srcId: Int = 0, outstanding: Int = 2)(implicit p: Parameters) extends TLModule()(p)
with HasDataBeatCounters {
val io = new Bundle {
val grant = Decoupled(new LogicalNetworkIO(new Grant)).flip
val refill = Decoupled(new Grant)
val finish = Decoupled(new LogicalNetworkIO(new Finish))
val ready = Bool(OUTPUT)
}
val g = io.grant.bits.payload
if(tlNetworkPreservesPointToPointOrdering) {
io.finish.valid := Bool(false)
io.refill.valid := io.grant.valid
io.refill.bits := g
io.grant.ready := io.refill.ready
io.ready := Bool(true)
} else {
// We only want to send Finishes after we have collected all beats of
// a multibeat Grant. But Grants from multiple managers or transactions may
// get interleaved, so we could need a counter for each.
val done = if(tlNetworkDoesNotInterleaveBeats) {
connectIncomingDataBeatCounterWithHeader(io.grant)
} else {
val entries = 1 << tlClientXactIdBits
def getId(g: LogicalNetworkIO[Grant]) = g.payload.client_xact_id
assert(getId(io.grant.bits) <= UInt(entries), "Not enough grant beat counters, only " + entries + " entries.")
connectIncomingDataBeatCountersWithHeader(io.grant, entries, getId).reduce(_||_)
}
val q = Module(new FinishQueue(outstanding))
q.io.enq.valid := io.grant.fire() && g.requiresAck() && (!g.hasMultibeatData() || done)
q.io.enq.bits := g.makeFinish()
q.io.enq.bits.manager_id := io.grant.bits.header.src
io.finish.bits.header.src := UInt(srcId)
io.finish.bits.header.dst := q.io.deq.bits.manager_id
io.finish.bits.payload := q.io.deq.bits
io.finish.valid := q.io.deq.valid
q.io.deq.ready := io.finish.ready
io.refill.valid := (q.io.enq.ready || !g.requiresAck()) && io.grant.valid
io.refill.bits := g
io.grant.ready := (q.io.enq.ready || !g.requiresAck()) && io.refill.ready
io.ready := q.io.enq.ready
}
}
class FinishQueue(entries: Int)(implicit p: Parameters) extends Queue(new FinishToDst()(p), entries)
/** A port to convert [[uncore.ClientTileLinkIO]].flip into [[uncore.TileLinkIO]]
*
* Creates network headers for [[uncore.Acquire]] and [[uncore.Release]] messages,
* calculating header.dst and filling in header.src.
* Strips headers from [[uncore.Probe Probes]].
* Passes [[uncore.GrantFromSrc]] and accepts [[uncore.FinishFromDst]] in response,
* setting up the headers for each.
*
* @param clientId network port id of this agent
* @param addrConvert how a physical address maps to a destination manager port id
*/
class ClientTileLinkNetworkPort(clientId: Int, addrConvert: UInt => UInt)
(implicit p: Parameters) extends TLModule()(p) {
val io = new Bundle {
val client = new ClientTileLinkIO().flip
val network = new TileLinkIO
}
val acq_with_header = ClientTileLinkHeaderCreator(io.client.acquire, clientId, addrConvert)
val rel_with_header = ClientTileLinkHeaderCreator(io.client.release, clientId, addrConvert)
val fin_with_header = ClientTileLinkHeaderCreator(io.client.finish, clientId)
val prb_without_header = DecoupledLogicalNetworkIOUnwrapper(io.network.probe)
val gnt_without_header = DecoupledLogicalNetworkIOUnwrapper(io.network.grant)
io.network.acquire <> acq_with_header
io.network.release <> rel_with_header
io.network.finish <> fin_with_header
io.client.probe <> prb_without_header
io.client.grant.bits.manager_id := io.network.grant.bits.header.src
io.client.grant <> gnt_without_header
}
/** A port to convert [[uncore.ClientUncachedTileLinkIO]].flip into [[uncore.TileLinkIO]]
*
* Creates network headers for [[uncore.Acquire]] and [[uncore.Release]] messages,
* calculating header.dst and filling in header.src.
* Responds to [[uncore.Grant]] by automatically issuing [[uncore.Finish]] to the granting managers.
*
* @param clientId network port id of this agent
* @param addrConvert how a physical address maps to a destination manager port id
*/
class ClientUncachedTileLinkNetworkPort(clientId: Int, addrConvert: UInt => UInt)
(implicit p: Parameters) extends TLModule()(p) {
val io = new Bundle {
val client = new ClientUncachedTileLinkIO().flip
val network = new TileLinkIO
}
val finisher = Module(new FinishUnit(clientId))
finisher.io.grant <> io.network.grant
io.network.finish <> finisher.io.finish
val acq_with_header = ClientTileLinkHeaderCreator(io.client.acquire, clientId, addrConvert)
val gnt_without_header = finisher.io.refill
io.network.acquire.bits := acq_with_header.bits
io.network.acquire.valid := acq_with_header.valid && finisher.io.ready
acq_with_header.ready := io.network.acquire.ready && finisher.io.ready
io.client.grant <> gnt_without_header
io.network.probe.ready := Bool(false)
io.network.release.valid := Bool(false)
}
object ClientTileLinkHeaderCreator {
def apply[T <: ClientToManagerChannel with HasManagerId](
in: DecoupledIO[T],
clientId: Int)
(implicit p: Parameters): DecoupledIO[LogicalNetworkIO[T]] = {
val out = Wire(new DecoupledIO(new LogicalNetworkIO(in.bits)))
out.bits.payload := in.bits
out.bits.header.src := UInt(clientId)
out.bits.header.dst := in.bits.manager_id
out.valid := in.valid
in.ready := out.ready
out
}
def apply[T <: ClientToManagerChannel with HasCacheBlockAddress](
in: DecoupledIO[T],
clientId: Int,
addrConvert: UInt => UInt)
(implicit p: Parameters): DecoupledIO[LogicalNetworkIO[T]] = {
val out = Wire(new DecoupledIO(new LogicalNetworkIO(in.bits)))
out.bits.payload := in.bits
out.bits.header.src := UInt(clientId)
out.bits.header.dst := addrConvert(in.bits.addr_block)
out.valid := in.valid
in.ready := out.ready
out
}
}
/** A port to convert [[uncore.ManagerTileLinkIO]].flip into [[uncore.TileLinkIO]].flip
*
* Creates network headers for [[uncore.Probe]] and [[uncore.Grant]] messagess,
* calculating header.dst and filling in header.src.
* Strips headers from [[uncore.Acquire]], [[uncore.Release]] and [[uncore.Finish]],
* but supplies client_id instead.
*
* @param managerId the network port id of this agent
* @param idConvert how a sharer id maps to a destination client port id
*/
class ManagerTileLinkNetworkPort(managerId: Int, idConvert: UInt => UInt)
(implicit p: Parameters) extends TLModule()(p) {
val io = new Bundle {
val manager = new ManagerTileLinkIO().flip
val network = new TileLinkIO().flip
}
io.network.grant <> ManagerTileLinkHeaderCreator(io.manager.grant, managerId, (u: UInt) => u)
io.network.probe <> ManagerTileLinkHeaderCreator(io.manager.probe, managerId, idConvert)
io.manager.acquire <> DecoupledLogicalNetworkIOUnwrapper(io.network.acquire)
io.manager.acquire.bits.client_id := io.network.acquire.bits.header.src
io.manager.release <> DecoupledLogicalNetworkIOUnwrapper(io.network.release)
io.manager.release.bits.client_id := io.network.release.bits.header.src
io.manager.finish <> DecoupledLogicalNetworkIOUnwrapper(io.network.finish)
}
object ManagerTileLinkHeaderCreator {
def apply[T <: ManagerToClientChannel with HasClientId](
in: DecoupledIO[T],
managerId: Int,
idConvert: UInt => UInt)
(implicit p: Parameters): DecoupledIO[LogicalNetworkIO[T]] = {
val out = Wire(new DecoupledIO(new LogicalNetworkIO(in.bits)))
out.bits.payload := in.bits
out.bits.header.src := UInt(managerId)
out.bits.header.dst := idConvert(in.bits.client_id)
out.valid := in.valid
in.ready := out.ready
out
}
}

View File

@ -0,0 +1,422 @@
package uncore.unittests
import Chisel._
import junctions._
import uncore.tilelink._
import uncore.constants._
import uncore.util._
import cde.Parameters
abstract class Driver(implicit p: Parameters) extends TLModule()(p) {
val io = new Bundle {
val mem = new ClientUncachedTileLinkIO
val start = Bool(INPUT)
val finished = Bool(OUTPUT)
}
}
/**
* Tests that single-beat Gets of decreasing size return subsets of the
* data returned by larger Gets
*/
class GetMultiWidthDriver(implicit p: Parameters) extends Driver()(p) {
val s_start :: s_send :: s_recv :: s_done :: Nil = Enum(Bits(), 4)
val state = Reg(init = s_start)
val w = 64
val initialSize = UInt(log2Ceil(w/8))
val size = Reg(UInt(width = log2Ceil(log2Ceil(w/8)+1)))
val ref = Reg(UInt(width = w))
val bytemask = (UInt(1) << (UInt(1) << size)) - UInt(1)
val bitmask = FillInterleaved(8, bytemask)
io.mem.acquire.valid := (state === s_send)
io.mem.acquire.bits := Get(
client_xact_id = UInt(0),
addr_block = UInt(0),
addr_beat = UInt(0),
addr_byte = UInt(0),
operand_size = size,
alloc = Bool(false))
io.mem.grant.ready := (state === s_recv)
when (state === s_start && io.start) {
size := initialSize
state := s_send
}
when (io.mem.acquire.fire()) { state := s_recv }
when (io.mem.grant.fire()) {
when (size === initialSize) { ref := io.mem.grant.bits.data }
size := size - UInt(1)
state := Mux(size === UInt(0), s_done, s_send)
}
io.finished := state === s_done
assert(!io.mem.grant.valid || size === initialSize ||
(io.mem.grant.bits.data & bitmask) === (ref & bitmask),
"GetMultiWidth: smaller get does not match larger get")
}
/**
* Tests that single-beat Gets across a range of memory return
* the expected data.
* @param expected The values of the data expected to be read.
* Each element is the data for one beat.
*/
class GetSweepDriver(expected: Seq[BigInt])
(implicit p: Parameters) extends Driver()(p) {
val s_start :: s_send :: s_recv :: s_done :: Nil = Enum(Bits(), 4)
val state = Reg(init = s_start)
val nReqs = expected.size
val (req_cnt, req_done) = Counter(io.mem.grant.fire(), nReqs)
when (state === s_start && io.start) { state := s_send }
when (io.mem.acquire.fire()) { state := s_recv }
when (io.mem.grant.fire()) { state := s_send }
when (req_done) { state := s_done }
val (addr_block, addr_beat) = if (nReqs > tlDataBeats) {
(req_cnt(log2Up(nReqs) - 1, tlBeatAddrBits),
req_cnt(tlBeatAddrBits - 1, 0))
} else {
(UInt(0), req_cnt)
}
val exp_data = Vec(expected.map(e => UInt(e, tlDataBits)))
io.mem.acquire.valid := (state === s_send)
io.mem.acquire.bits := Get(
client_xact_id = UInt(0),
addr_block = addr_block,
addr_beat = addr_beat)
io.mem.grant.ready := (state === s_recv)
io.finished := state === s_done
assert(!io.mem.grant.valid || io.mem.grant.bits.data === exp_data(req_cnt),
"GetSweep: data does not match expected")
}
/**
* Tests that multi-beat GetBlocks across a range of memory return
* the expected data.
* @param expected The values of the data expected to be read.
* Each element is the data for one beat.
*/
class GetBlockSweepDriver(expected: Seq[BigInt])
(implicit p: Parameters) extends Driver()(p) {
val s_start :: s_send :: s_recv :: s_done :: Nil = Enum(Bits(), 4)
val state = Reg(init = s_start)
val nReqs = ((expected.size - 1) / tlDataBeats + 1) * tlDataBeats
val (req_cnt, req_done) = Counter(io.mem.grant.fire(), nReqs)
val (addr_beat, beats_done) = Counter(io.mem.grant.fire(), tlDataBeats)
val tlBlockOffset = tlByteAddrBits + tlBeatAddrBits
val addr_block =
if (nReqs > tlDataBeats) req_cnt(log2Up(nReqs) - 1, tlBlockOffset)
else UInt(0)
io.mem.acquire.valid := (state === s_send)
io.mem.acquire.bits := GetBlock(
client_xact_id = UInt(0),
addr_block = addr_block)
io.mem.grant.ready := (state === s_recv)
io.finished := state === s_done
when (state === s_start && io.start) { state := s_send }
when (io.mem.acquire.fire()) { state := s_recv }
when (beats_done) { state := s_send }
when (req_done) { state := s_done }
val exp_data = Vec(expected.map(e => UInt(e, tlDataBits)))
assert(!io.mem.grant.valid || req_cnt >= UInt(expected.size) ||
io.mem.grant.bits.data === exp_data(req_cnt),
"GetBlockSweep: data does not match expected")
}
/**
* Tests that single-beat Puts across a range of memory persists correctly.
* @param n the number of beats to put
*/
class PutSweepDriver(val n: Int)(implicit p: Parameters) extends Driver()(p) {
val (s_idle :: s_put_req :: s_put_resp ::
s_get_req :: s_get_resp :: s_done :: Nil) = Enum(Bits(), 6)
val state = Reg(init = s_idle)
val (put_cnt, put_done) = Counter(state === s_put_resp && io.mem.grant.valid, n)
val (get_cnt, get_done) = Counter(state === s_get_resp && io.mem.grant.valid, n)
val (put_block, put_beat) = if (n > tlDataBeats) {
(put_cnt(log2Up(n) - 1, tlBeatAddrBits),
put_cnt(tlBeatAddrBits - 1, 0))
} else {
(UInt(0), put_cnt)
}
val (get_block, get_beat) = if (n > tlDataBeats) {
(get_cnt(log2Up(n) - 1, tlBeatAddrBits),
get_cnt(tlBeatAddrBits - 1, 0))
} else {
(UInt(0), get_cnt)
}
val dataRep = (tlDataBits - 1) / log2Up(n) + 1
val put_data = Fill(dataRep, put_cnt)(tlDataBits - 1, 0)
val get_data = Fill(dataRep, get_cnt)(tlDataBits - 1, 0)
io.mem.acquire.valid := state.isOneOf(s_put_req, s_get_req)
io.mem.acquire.bits := Mux(state === s_put_req,
Put(
client_xact_id = UInt(0),
addr_block = put_block,
addr_beat = put_beat,
data = put_data),
Get(
client_xact_id = UInt(0),
addr_block = get_block,
addr_beat = get_beat))
io.mem.grant.ready := state.isOneOf(s_put_resp, s_get_resp)
when (state === s_idle && io.start) { state := s_put_req }
when (state === s_put_req && io.mem.acquire.ready) { state := s_put_resp }
when (state === s_put_resp && io.mem.grant.valid) {
state := Mux(put_done, s_get_req, s_put_req)
}
when (state === s_get_req && io.mem.acquire.ready) { state := s_get_resp }
when (state === s_get_resp && io.mem.grant.valid) {
state := Mux(get_done, s_done, s_get_req)
}
io.finished := (state === s_done)
assert(!io.mem.grant.valid || !io.mem.grant.bits.hasData() ||
io.mem.grant.bits.data === get_data,
"PutSweepDriver: data does not match")
}
/**
* Tests that write-masked single-beat puts work correctly by putting
* data with steadily smaller write-masks to the same beat.
* @param minBytes the smallest number of bytes that can be in the writemask
*/
class PutMaskDriver(minBytes: Int = 1)(implicit p: Parameters) extends Driver()(p) {
val (s_idle :: s_put_req :: s_put_resp ::
s_get_req :: s_get_resp :: s_done :: Nil) = Enum(Bits(), 6)
val state = Reg(init = s_idle)
val nbytes = Reg(UInt(width = log2Up(tlWriteMaskBits) + 1))
val wmask = (UInt(1) << nbytes) - UInt(1)
val wdata = Fill(tlDataBits / 8, Wire(UInt(width = 8), init = nbytes))
// TL data bytes down to minBytes logarithmically by 2
val expected = (log2Ceil(tlDataBits / 8) to log2Ceil(minBytes) by -1)
.map(1 << _).foldLeft(UInt(0, tlDataBits)) {
// Change the lower nbytes of the value
(value, nbytes) => {
val mask = UInt((BigInt(1) << (nbytes * 8)) - BigInt(1), tlDataBits)
val wval = Fill(tlDataBits / 8, UInt(nbytes, 8))
(value & ~mask) | (wval & mask)
}
}
when (state === s_idle && io.start) {
state := s_put_req
nbytes := UInt(8)
}
when (state === s_put_req && io.mem.acquire.ready) {
state := s_put_resp
}
when (state === s_put_resp && io.mem.grant.valid) {
nbytes := nbytes >> UInt(1)
state := Mux(nbytes === UInt(minBytes), s_get_req, s_put_req)
}
when (state === s_get_req && io.mem.acquire.ready) {
state := s_get_resp
}
when (state === s_get_resp && io.mem.grant.valid) {
state := s_done
}
io.finished := (state === s_done)
io.mem.acquire.valid := state.isOneOf(s_put_req, s_get_req)
io.mem.acquire.bits := Mux(state === s_put_req,
Put(
client_xact_id = UInt(0),
addr_block = UInt(0),
addr_beat = UInt(0),
data = wdata,
wmask = Some(wmask)),
Get(
client_xact_id = UInt(0),
addr_block = UInt(0),
addr_beat = UInt(0)))
io.mem.grant.ready := state.isOneOf(s_put_resp, s_get_resp)
assert(!io.mem.grant.valid || state =/= s_get_resp ||
io.mem.grant.bits.data === expected,
"PutMask: data does not match expected")
}
class PutBlockSweepDriver(val n: Int)(implicit p: Parameters)
extends Driver()(p) {
val (s_idle :: s_put_req :: s_put_resp ::
s_get_req :: s_get_resp :: s_done :: Nil) = Enum(Bits(), 6)
val state = Reg(init = s_idle)
val (put_beat, put_beat_done) = Counter(
state === s_put_req && io.mem.acquire.ready, tlDataBeats)
val (put_cnt, put_done) = Counter(
state === s_put_resp && io.mem.grant.valid, n)
val (get_beat, get_beat_done) = Counter(
state === s_get_resp && io.mem.grant.valid, tlDataBeats)
val (get_cnt, get_done) = Counter(get_beat_done, n)
val dataRep = (tlDataBits - 1) / (log2Up(n) + tlBeatAddrBits) + 1
val put_data = Fill(dataRep, Cat(put_cnt, put_beat))(tlDataBits - 1, 0)
val get_data = Fill(dataRep, Cat(get_cnt, get_beat))(tlDataBits - 1, 0)
when (state === s_idle && io.start) { state := s_put_req }
when (put_beat_done) { state := s_put_resp }
when (state === s_put_resp && io.mem.grant.valid) {
state := Mux(put_done, s_get_req, s_put_req)
}
when (state === s_get_req && io.mem.acquire.ready) { state := s_get_resp }
when (get_beat_done) { state := Mux(get_done, s_done, s_get_req) }
val put_acquire = PutBlock(
client_xact_id = UInt(0),
addr_block = put_cnt,
addr_beat = put_beat,
data = put_data)
val get_acquire = GetBlock(
client_xact_id = UInt(0),
addr_block = get_cnt)
io.finished := (state === s_done)
io.mem.acquire.valid := state.isOneOf(s_put_req, s_get_req)
io.mem.acquire.bits := Mux(state === s_put_req, put_acquire, get_acquire)
io.mem.grant.ready := state.isOneOf(s_put_resp, s_get_resp)
assert(!io.mem.grant.valid || state =/= s_get_resp ||
io.mem.grant.bits.data === get_data,
"PutBlockSweep: data does not match expected")
}
class PutAtomicDriver(implicit p: Parameters) extends Driver()(p) {
val s_idle :: s_put :: s_atomic :: s_get :: s_done :: Nil = Enum(Bits(), 5)
val state = Reg(init = s_idle)
val sending = Reg(init = Bool(false))
val put_acquire = Put(
client_xact_id = UInt(0),
addr_block = UInt(0),
addr_beat = UInt(0),
// Put 15 in bytes 7:4
data = UInt(15L << 32),
wmask = Some(UInt(0xf0)))
val amo_acquire = PutAtomic(
client_xact_id = UInt(0),
addr_block = UInt(0),
addr_beat = UInt(0),
addr_byte = UInt(4),
atomic_opcode = M_XA_ADD,
operand_size = UInt(log2Ceil(32 / 8)),
data = UInt(3L << 32))
val get_acquire = Get(
client_xact_id = UInt(0),
addr_block = UInt(0),
addr_beat = UInt(0))
io.finished := (state === s_done)
io.mem.acquire.valid := sending
io.mem.acquire.bits := MuxLookup(state, get_acquire, Seq(
s_put -> put_acquire,
s_atomic -> amo_acquire,
s_get -> get_acquire))
io.mem.grant.ready := !sending
when (io.mem.acquire.fire()) { sending := Bool(false) }
when (state === s_idle && io.start) {
state := s_put
sending := Bool(true)
}
when (io.mem.grant.fire()) {
when (state === s_put) { sending := Bool(true); state := s_atomic }
when (state === s_atomic) { sending := Bool(true); state := s_get }
when (state === s_get) { state := s_done }
}
assert(!io.mem.grant.valid || state =/= s_get ||
io.mem.grant.bits.data(63, 32) === UInt(18))
}
class PrefetchDriver(implicit p: Parameters) extends Driver()(p) {
val s_idle :: s_put_pf :: s_get_pf :: s_done :: Nil = Enum(Bits(), 4)
val state = Reg(init = s_idle)
val sending = Reg(init = Bool(false))
when (state === s_idle) {
sending := Bool(true)
state := s_put_pf
}
when (io.mem.acquire.fire()) { sending := Bool(false) }
when (io.mem.grant.fire()) {
when (state === s_put_pf) { sending := Bool(true); state := s_get_pf }
when (state === s_get_pf) { state := s_done }
}
io.finished := (state === s_done)
io.mem.acquire.valid := sending
io.mem.acquire.bits := Mux(state === s_put_pf,
PutPrefetch(
client_xact_id = UInt(0),
addr_block = UInt(0)),
GetPrefetch(
client_xact_id = UInt(0),
addr_block = UInt(0)))
io.mem.grant.ready := !sending
}
class DriverSet(driverGen: Parameters => Seq[Driver])(implicit p: Parameters)
extends Driver()(p) {
val s_start :: s_run :: s_done :: Nil = Enum(Bits(), 3)
val state = Reg(init = s_start)
val drivers = driverGen(p)
val idx = Reg(init = UInt(0, log2Up(drivers.size)))
val finished = Wire(init = Bool(false))
when (state === s_start && io.start) { state := s_run }
when (state === s_run && finished) {
when (idx === UInt(drivers.size - 1)) { state := s_done }
idx := idx + UInt(1)
}
io.finished := state === s_done
io.mem.acquire.valid := Bool(false)
io.mem.grant.ready := Bool(false)
drivers.zipWithIndex.foreach { case (driv, i) =>
val me = idx === UInt(i)
driv.io.start := me && state === s_run
driv.io.mem.acquire.ready := io.mem.acquire.ready && me
driv.io.mem.grant.valid := io.mem.grant.valid && me
driv.io.mem.grant.bits := io.mem.grant.bits
when (me) {
io.mem.acquire.valid := driv.io.mem.acquire.valid
io.mem.acquire.bits := driv.io.mem.acquire.bits
io.mem.grant.ready := driv.io.mem.grant.ready
finished := driv.io.finished
}
}
}

View File

@ -0,0 +1,85 @@
package uncore.unittests
import Chisel._
import junctions._
import junctions.unittests._
import uncore.devices._
import uncore.tilelink._
import uncore.converters._
import cde.Parameters
class SmiConverterTest(implicit val p: Parameters) extends UnitTest
with HasTileLinkParameters {
val outermostParams = p.alterPartial({ case TLId => "Outermost" })
val smiWidth = 32
val smiDepth = 64
val tlDepth = (smiWidth * smiDepth) / tlDataBits
val smimem = Module(new SmiMem(smiWidth, smiDepth))
val conv = Module(new SmiIOTileLinkIOConverter(
smiWidth, log2Up(smiDepth))(outermostParams))
val driver = Module(new DriverSet(
(driverParams: Parameters) => {
implicit val p = driverParams
Seq(
Module(new PutSweepDriver(tlDepth)),
Module(new PutMaskDriver(smiWidth / 8)),
Module(new PutBlockSweepDriver(tlDepth / tlDataBeats)),
Module(new GetMultiWidthDriver))
})(outermostParams))
conv.io.tl <> driver.io.mem
smimem.io <> conv.io.smi
driver.io.start := io.start
io.finished := driver.io.finished
}
class ROMSlaveTest(implicit p: Parameters) extends UnitTest {
implicit val testName = "ROMSlaveTest"
val romdata = Seq(
BigInt("01234567deadbeef", 16),
BigInt("ab32fee8d00dfeed", 16))
val rombytes = romdata.map(_.toByteArray.reverse).flatten
val rom = Module(new ROMSlave(rombytes))
val driver = Module(new DriverSet(
(driverParams: Parameters) => {
implicit val p = driverParams
Seq(
Module(new GetMultiWidthDriver),
Module(new GetSweepDriver(romdata)),
Module(new GetBlockSweepDriver(romdata)))
}))
rom.io <> driver.io.mem
driver.io.start := io.start
io.finished := driver.io.finished
}
class TileLinkRAMTest(implicit val p: Parameters)
extends UnitTest with HasTileLinkParameters {
val depth = 2 * tlDataBeats
val ram = Module(new TileLinkTestRAM(depth))
val driver = Module(new DriverSet(
(driverParams: Parameters) => {
implicit val p = driverParams
Seq(
Module(new PutSweepDriver(depth)),
Module(new PutMaskDriver),
Module(new PutAtomicDriver),
Module(new PutBlockSweepDriver(depth / tlDataBeats)),
Module(new PrefetchDriver),
Module(new GetMultiWidthDriver))
}))
ram.io <> driver.io.mem
driver.io.start := io.start
io.finished := driver.io.finished
}
object UncoreUnitTests {
def apply(implicit p: Parameters): Seq[UnitTest] =
Seq(
Module(new SmiConverterTest),
Module(new ROMSlaveTest),
Module(new TileLinkRAMTest))
}

View File

@ -0,0 +1,105 @@
// See LICENSE for license details.
package uncore.util
import Chisel._
import uncore.tilelink._
import cde.Parameters
import uncore.constants._
class StoreGen(typ: UInt, addr: UInt, dat: UInt, maxSize: Int) {
val size = typ(log2Up(log2Up(maxSize)+1)-1,0)
def misaligned =
(addr & ((UInt(1) << size) - UInt(1))(log2Up(maxSize)-1,0)).orR
def mask = {
var res = UInt(1)
for (i <- 0 until log2Up(maxSize)) {
val upper = Mux(addr(i), res, UInt(0)) | Mux(size >= UInt(i+1), UInt((BigInt(1) << (1 << i))-1), UInt(0))
val lower = Mux(addr(i), UInt(0), res)
res = Cat(upper, lower)
}
res
}
protected def genData(i: Int): UInt =
if (i >= log2Up(maxSize)) dat
else Mux(size === UInt(i), Fill(1 << (log2Up(maxSize)-i), dat((8 << i)-1,0)), genData(i+1))
def data = genData(0)
def wordData = genData(2)
}
class StoreGenAligned(typ: UInt, addr: UInt, dat: UInt, maxSize: Int) extends StoreGen(typ, addr, dat, maxSize) {
override def genData(i: Int) = dat
}
class LoadGen(typ: UInt, signed: Bool, addr: UInt, dat: UInt, zero: Bool, maxSize: Int) {
private val size = new StoreGen(typ, addr, dat, maxSize).size
private def genData(logMinSize: Int): UInt = {
var res = dat
for (i <- log2Up(maxSize)-1 to logMinSize by -1) {
val pos = 8 << i
val shifted = Mux(addr(i), res(2*pos-1,pos), res(pos-1,0))
val doZero = Bool(i == 0) && zero
val zeroed = Mux(doZero, UInt(0), shifted)
res = Cat(Mux(size === UInt(i) || doZero, Fill(8*maxSize-pos, signed && zeroed(pos-1)), res(8*maxSize-1,pos)), zeroed)
}
res
}
def wordData = genData(2)
def data = genData(0)
}
class AMOALU(operandBits: Int, rhsIsAligned: Boolean = false)(implicit p: Parameters) extends Module {
require(operandBits == 32 || operandBits == 64)
val io = new Bundle {
val addr = Bits(INPUT, log2Ceil(operandBits/8))
val cmd = Bits(INPUT, M_SZ)
val typ = Bits(INPUT, log2Ceil(log2Ceil(operandBits/8) + 1))
val lhs = Bits(INPUT, operandBits)
val rhs = Bits(INPUT, operandBits)
val out = Bits(OUTPUT, operandBits)
}
val storegen =
if(rhsIsAligned) new StoreGenAligned(io.typ, io.addr, io.rhs, operandBits/8)
else new StoreGen(io.typ, io.addr, io.rhs, operandBits/8)
val rhs = storegen.wordData
val sgned = io.cmd === M_XA_MIN || io.cmd === M_XA_MAX
val max = io.cmd === M_XA_MAX || io.cmd === M_XA_MAXU
val min = io.cmd === M_XA_MIN || io.cmd === M_XA_MINU
val adder_out =
if (operandBits == 32) io.lhs + rhs
else {
val mask = ~UInt(0,64) ^ (io.addr(2) << 31)
(io.lhs & mask) + (rhs & mask)
}
val less =
if (operandBits == 32) Mux(io.lhs(31) === rhs(31), io.lhs < rhs, Mux(sgned, io.lhs(31), io.rhs(31)))
else {
val word = !io.typ(0)
val cmp_lhs = Mux(word && !io.addr(2), io.lhs(31), io.lhs(63))
val cmp_rhs = Mux(word && !io.addr(2), rhs(31), rhs(63))
val lt_lo = io.lhs(31,0) < rhs(31,0)
val lt_hi = io.lhs(63,32) < rhs(63,32)
val eq_hi = io.lhs(63,32) === rhs(63,32)
val lt = Mux(word, Mux(io.addr(2), lt_hi, lt_lo), lt_hi || eq_hi && lt_lo)
Mux(cmp_lhs === cmp_rhs, lt, Mux(sgned, cmp_lhs, cmp_rhs))
}
val out = Mux(io.cmd === M_XA_ADD, adder_out,
Mux(io.cmd === M_XA_AND, io.lhs & rhs,
Mux(io.cmd === M_XA_OR, io.lhs | rhs,
Mux(io.cmd === M_XA_XOR, io.lhs ^ rhs,
Mux(Mux(less, min, max), io.lhs,
storegen.data)))))
val wmask = FillInterleaved(8, storegen.mask)
io.out := wmask & out | ~wmask & io.lhs
}

View File

@ -0,0 +1,134 @@
package uncore.util
import Chisel._
import uncore.tilelink._
import cde.Parameters
// Produces 0-width value when counting to 1
class ZCounter(val n: Int) {
val value = Reg(init=UInt(0, log2Ceil(n)))
def inc(): Bool = {
if (n == 1) Bool(true)
else {
val wrap = value === UInt(n-1)
value := Mux(Bool(!isPow2(n)) && wrap, UInt(0), value + UInt(1))
wrap
}
}
}
object ZCounter {
def apply(n: Int) = new ZCounter(n)
def apply(cond: Bool, n: Int): (UInt, Bool) = {
val c = new ZCounter(n)
var wrap: Bool = null
when (cond) { wrap = c.inc() }
(c.value, cond && wrap)
}
}
object TwoWayCounter {
def apply(up: Bool, down: Bool, max: Int): UInt = {
val cnt = Reg(init = UInt(0, log2Up(max+1)))
when (up && !down) { cnt := cnt + UInt(1) }
when (down && !up) { cnt := cnt - UInt(1) }
cnt
}
}
class BeatCounterStatus extends Bundle {
val idx = UInt()
val done = Bool()
}
class TwoWayBeatCounterStatus extends Bundle {
val pending = Bool()
val up = new BeatCounterStatus()
val down = new BeatCounterStatus()
}
/** Utility trait containing wiring functions to keep track of how many data beats have
* been sent or recieved over a particular [[uncore.TileLinkChannel]] or pair of channels.
*
* Won't count message types that don't have data.
* Used in [[uncore.XactTracker]] and [[uncore.FinishUnit]].
*/
trait HasDataBeatCounters {
type HasBeat = TileLinkChannel with HasTileLinkBeatId
type HasId = TileLinkChannel with HasClientId
/** Returns the current count on this channel and when a message is done
* @param inc increment the counter (usually .valid or .fire())
* @param data the actual channel data
* @param beat count to return for single-beat messages
*/
def connectDataBeatCounter[S <: TileLinkChannel](inc: Bool, data: S, beat: UInt) = {
val multi = data.hasMultibeatData()
val (multi_cnt, multi_done) = Counter(inc && multi, data.tlDataBeats)
val cnt = Mux(multi, multi_cnt, beat)
val done = Mux(multi, multi_done, inc)
(cnt, done)
}
/** Counter for beats on outgoing [[chisel.DecoupledIO]] */
def connectOutgoingDataBeatCounter[T <: TileLinkChannel](
out: DecoupledIO[T],
beat: UInt = UInt(0)): (UInt, Bool) =
connectDataBeatCounter(out.fire(), out.bits, beat)
/** Returns done but not cnt. Use the addr_beat subbundle instead of cnt for beats on
* incoming channels in case of network reordering.
*/
def connectIncomingDataBeatCounter[T <: TileLinkChannel](in: DecoupledIO[T]): Bool =
connectDataBeatCounter(in.fire(), in.bits, UInt(0))._2
/** Counter for beats on incoming DecoupledIO[LogicalNetworkIO[]]s returns done */
def connectIncomingDataBeatCounterWithHeader[T <: TileLinkChannel](in: DecoupledIO[LogicalNetworkIO[T]]): Bool =
connectDataBeatCounter(in.fire(), in.bits.payload, UInt(0))._2
/** If the network might interleave beats from different messages, we need a Vec of counters,
* one for every outstanding message id that might be interleaved.
*
* @param getId mapping from Message to counter id
*/
def connectIncomingDataBeatCountersWithHeader[T <: TileLinkChannel with HasClientTransactionId](
in: DecoupledIO[LogicalNetworkIO[T]],
entries: Int,
getId: LogicalNetworkIO[T] => UInt): Vec[Bool] = {
Vec((0 until entries).map { i =>
connectDataBeatCounter(in.fire() && getId(in.bits) === UInt(i), in.bits.payload, UInt(0))._2
})
}
/** Provides counters on two channels, as well a meta-counter that tracks how many
* messages have been sent over the up channel but not yet responded to over the down channel
*
* @param status bundle of status of the counters
* @param up outgoing channel
* @param down incoming channel
* @param max max number of outstanding ups with no down
* @param beat overrides cnts on single-beat messages
* @param track whether up's message should be tracked
* @return a tuple containing whether their are outstanding messages, up's count,
* up's done, down's count, down's done
*/
def connectTwoWayBeatCounters[T <: TileLinkChannel, S <: TileLinkChannel](
status: TwoWayBeatCounterStatus,
up: DecoupledIO[T],
down: DecoupledIO[S],
max: Int = 1,
beat: UInt = UInt(0),
trackUp: T => Bool = (t: T) => Bool(true),
trackDown: S => Bool = (s: S) => Bool(true)) {
val (up_idx, up_done) = connectDataBeatCounter(up.fire() && trackUp(up.bits), up.bits, beat)
val (dn_idx, dn_done) = connectDataBeatCounter(down.fire() && trackDown(down.bits), down.bits, beat)
val cnt = TwoWayCounter(up_done, dn_done, max)
status.pending := cnt > UInt(0)
status.up.idx := up_idx
status.up.done := up_done
status.down.idx := dn_idx
status.down.done := dn_done
}
}

View File

@ -0,0 +1,56 @@
package uncore.util
import Chisel._
import uncore.tilelink._
import cde.Parameters
/** Struct for describing per-channel queue depths */
case class TileLinkDepths(acq: Int, prb: Int, rel: Int, gnt: Int, fin: Int)
/** Optionally enqueues each [[uncore.TileLinkChannel]] individually */
class TileLinkEnqueuer(depths: TileLinkDepths)(implicit p: Parameters) extends Module {
val io = new Bundle {
val client = new TileLinkIO().flip
val manager = new TileLinkIO
}
io.manager.acquire <> (if(depths.acq > 0) Queue(io.client.acquire, depths.acq) else io.client.acquire)
io.client.probe <> (if(depths.prb > 0) Queue(io.manager.probe, depths.prb) else io.manager.probe)
io.manager.release <> (if(depths.rel > 0) Queue(io.client.release, depths.rel) else io.client.release)
io.client.grant <> (if(depths.gnt > 0) Queue(io.manager.grant, depths.gnt) else io.manager.grant)
io.manager.finish <> (if(depths.fin > 0) Queue(io.client.finish, depths.fin) else io.client.finish)
}
object TileLinkEnqueuer {
def apply(in: TileLinkIO, depths: TileLinkDepths)(implicit p: Parameters): TileLinkIO = {
val t = Module(new TileLinkEnqueuer(depths))
t.io.client <> in
t.io.manager
}
def apply(in: TileLinkIO, depth: Int)(implicit p: Parameters): TileLinkIO = {
apply(in, TileLinkDepths(depth, depth, depth, depth, depth))
}
}
class ClientTileLinkEnqueuer(depths: TileLinkDepths)(implicit p: Parameters) extends Module {
val io = new Bundle {
val inner = new ClientTileLinkIO().flip
val outer = new ClientTileLinkIO
}
io.outer.acquire <> (if(depths.acq > 0) Queue(io.inner.acquire, depths.acq) else io.inner.acquire)
io.inner.probe <> (if(depths.prb > 0) Queue(io.outer.probe, depths.prb) else io.outer.probe)
io.outer.release <> (if(depths.rel > 0) Queue(io.inner.release, depths.rel) else io.inner.release)
io.inner.grant <> (if(depths.gnt > 0) Queue(io.outer.grant, depths.gnt) else io.outer.grant)
io.outer.finish <> (if(depths.fin > 0) Queue(io.inner.finish, depths.fin) else io.inner.finish)
}
object ClientTileLinkEnqueuer {
def apply(in: ClientTileLinkIO, depths: TileLinkDepths)(implicit p: Parameters): ClientTileLinkIO = {
val t = Module(new ClientTileLinkEnqueuer(depths))
t.io.inner <> in
t.io.outer
}
def apply(in: ClientTileLinkIO, depth: Int)(implicit p: Parameters): ClientTileLinkIO = {
apply(in, TileLinkDepths(depth, depth, depth, depth, depth))
}
}

View File

@ -0,0 +1,25 @@
package uncore
import Chisel._
package object util {
implicit class UIntIsOneOf(val x: UInt) extends AnyVal {
def isOneOf(s: Seq[UInt]): Bool = s.map(x === _).reduce(_||_)
def isOneOf(u1: UInt, u2: UInt*): Bool = isOneOf(u1 +: u2.toSeq)
}
implicit class SeqToAugmentedSeq[T <: Data](val x: Seq[T]) extends AnyVal {
def apply(idx: UInt): T = {
if (x.size == 1) {
x.head
} else {
val half = 1 << (log2Ceil(x.size) - 1)
val newIdx = idx & UInt(half - 1)
Mux(idx >= UInt(half), x.drop(half)(newIdx), x.take(half)(newIdx))
}
}
def asUInt(): UInt = Cat(x.map(_.asUInt).reverse)
}
}

View File

@ -0,0 +1,69 @@
// See LICENSE for license details.
package uncore.util
import Chisel._
import uncore.tilelink._
class FlowThroughSerializer[T <: Bundle with HasTileLinkData](gen: T, n: Int) extends Module {
val io = new Bundle {
val in = Decoupled(gen).flip
val out = Decoupled(gen)
val cnt = UInt(OUTPUT, log2Up(n))
val done = Bool(OUTPUT)
}
val narrowWidth = io.in.bits.data.getWidth / n
require(io.in.bits.data.getWidth % narrowWidth == 0)
if(n == 1) {
io.out <> io.in
io.cnt := UInt(0)
io.done := Bool(true)
} else {
val cnt = Reg(init=UInt(0, width = log2Up(n)))
val wrap = cnt === UInt(n-1)
val rbits = Reg{io.in.bits}
val active = Reg(init=Bool(false))
val shifter = Wire(Vec(n, Bits(width = narrowWidth)))
(0 until n).foreach {
i => shifter(i) := rbits.data((i+1)*narrowWidth-1,i*narrowWidth)
}
io.done := Bool(false)
io.cnt := cnt
io.in.ready := !active
io.out.valid := active || io.in.valid
io.out.bits := io.in.bits
when(!active && io.in.valid) {
when(io.in.bits.hasData()) {
cnt := Mux(io.out.ready, UInt(1), UInt(0))
rbits := io.in.bits
active := Bool(true)
}
io.done := !io.in.bits.hasData()
}
when(active) {
io.out.bits := rbits
io.out.bits.data := shifter(cnt)
when(io.out.ready) {
cnt := cnt + UInt(1)
when(wrap) {
cnt := UInt(0)
io.done := Bool(true)
active := Bool(false)
}
}
}
}
}
object FlowThroughSerializer {
def apply[T <: Bundle with HasTileLinkData](in: DecoupledIO[T], n: Int): DecoupledIO[T] = {
val fs = Module(new FlowThroughSerializer(in.bits, n))
fs.io.in.valid := in.valid
fs.io.in.bits := in.bits
in.ready := fs.io.in.ready
fs.io.out
}
}