reorganize moving non-submodule packages into src/main/scala
This commit is contained in:
381
src/main/scala/coreplex/Configs.scala
Normal file
381
src/main/scala/coreplex/Configs.scala
Normal file
@@ -0,0 +1,381 @@
|
||||
// See LICENSE for license details.
|
||||
|
||||
package coreplex
|
||||
|
||||
import Chisel._
|
||||
import junctions._
|
||||
import uncore.tilelink._
|
||||
import uncore.coherence._
|
||||
import uncore.agents._
|
||||
import uncore.devices._
|
||||
import uncore.converters._
|
||||
import rocket._
|
||||
import rocket.Util._
|
||||
import scala.math.max
|
||||
import scala.collection.mutable.{LinkedHashSet, ListBuffer}
|
||||
import DefaultTestSuites._
|
||||
import cde.{Parameters, Config, Dump, Knob, CDEMatchError}
|
||||
|
||||
object ConfigUtils {
|
||||
def max_int(values: Int*): Int = {
|
||||
values.reduce((a, b) => max(a, b))
|
||||
}
|
||||
}
|
||||
import ConfigUtils._
|
||||
|
||||
class BaseCoreplexConfig extends Config (
|
||||
topDefinitions = { (pname,site,here) =>
|
||||
type PF = PartialFunction[Any,Any]
|
||||
def findBy(sname:Any):Any = here[PF](site[Any](sname))(pname)
|
||||
lazy val innerDataBits = 64
|
||||
lazy val innerDataBeats = (8 * site(CacheBlockBytes)) / innerDataBits
|
||||
pname match {
|
||||
//Memory Parameters
|
||||
case PAddrBits => 32
|
||||
case PgLevels => if (site(XLen) == 64) 3 /* Sv39 */ else 2 /* Sv32 */
|
||||
case ASIdBits => 7
|
||||
//Params used by all caches
|
||||
case NSets => findBy(CacheName)
|
||||
case NWays => findBy(CacheName)
|
||||
case RowBits => findBy(CacheName)
|
||||
case NTLBEntries => findBy(CacheName)
|
||||
case CacheIdBits => findBy(CacheName)
|
||||
case SplitMetadata => findBy(CacheName)
|
||||
case "L1I" => {
|
||||
case NSets => Knob("L1I_SETS") //64
|
||||
case NWays => Knob("L1I_WAYS") //4
|
||||
case RowBits => site(TLKey("L1toL2")).dataBitsPerBeat
|
||||
case NTLBEntries => 8
|
||||
case CacheIdBits => 0
|
||||
case SplitMetadata => false
|
||||
}:PF
|
||||
case "L1D" => {
|
||||
case NSets => Knob("L1D_SETS") //64
|
||||
case NWays => Knob("L1D_WAYS") //4
|
||||
case RowBits => site(TLKey("L1toL2")).dataBitsPerBeat
|
||||
case NTLBEntries => 8
|
||||
case CacheIdBits => 0
|
||||
case SplitMetadata => false
|
||||
}:PF
|
||||
case ECCCode => None
|
||||
case Replacer => () => new RandomReplacement(site(NWays))
|
||||
//L1InstCache
|
||||
case BtbKey => BtbParameters()
|
||||
//L1DataCache
|
||||
case DCacheKey => DCacheConfig(nMSHRs = site(Knob("L1D_MSHRS")))
|
||||
//L2 Memory System Params
|
||||
case AmoAluOperandBits => site(XLen)
|
||||
case NAcquireTransactors => 7
|
||||
case L2StoreDataQueueDepth => 1
|
||||
case L2DirectoryRepresentation => new NullRepresentation(site(NTiles))
|
||||
case BuildL2CoherenceManager => (id: Int, p: Parameters) =>
|
||||
Module(new L2BroadcastHub()(p.alterPartial({
|
||||
case InnerTLId => "L1toL2"
|
||||
case OuterTLId => "L2toMC" })))
|
||||
case NCachedTileLinkPorts => 1
|
||||
case NUncachedTileLinkPorts => 1
|
||||
//Tile Constants
|
||||
case BuildTiles => {
|
||||
val env = if(site(UseVM)) List("p","v") else List("p")
|
||||
site(FPUKey) foreach { case cfg =>
|
||||
TestGeneration.addSuite(rv32udBenchmarks)
|
||||
TestGeneration.addSuites(env.map(rv64ufNoDiv))
|
||||
TestGeneration.addSuites(env.map(rv64udNoDiv))
|
||||
if (cfg.divSqrt) {
|
||||
TestGeneration.addSuites(env.map(rv64uf))
|
||||
TestGeneration.addSuites(env.map(rv64ud))
|
||||
}
|
||||
}
|
||||
if (site(UseAtomics)) TestGeneration.addSuites(env.map(if (site(XLen) == 64) rv64ua else rv32ua))
|
||||
if (site(UseCompressed)) TestGeneration.addSuites(env.map(if (site(XLen) == 64) rv64uc else rv32uc))
|
||||
val (rvi, rvu) =
|
||||
if (site(XLen) == 64) ((if (site(UseVM)) rv64i else rv64pi), rv64u)
|
||||
else ((if (site(UseVM)) rv32i else rv32pi), rv32u)
|
||||
TestGeneration.addSuites(rvi.map(_("p")))
|
||||
TestGeneration.addSuites((if(site(UseVM)) List("v") else List()).flatMap(env => rvu.map(_(env))))
|
||||
TestGeneration.addSuite(if (site(UseVM)) benchmarks else emptyBmarks)
|
||||
List.fill(site(NTiles)){ (r: Bool, p: Parameters) =>
|
||||
Module(new RocketTile(resetSignal = r)(p.alterPartial({
|
||||
case TLId => "L1toL2"
|
||||
case NUncachedTileLinkPorts => 1 + site(RoccNMemChannels)
|
||||
})))
|
||||
}
|
||||
}
|
||||
case BuildRoCC => Nil
|
||||
case RoccNMemChannels => site(BuildRoCC).map(_.nMemChannels).foldLeft(0)(_ + _)
|
||||
case RoccNPTWPorts => site(BuildRoCC).map(_.nPTWPorts).foldLeft(0)(_ + _)
|
||||
//Rocket Core Constants
|
||||
case CoreInstBits => if (site(UseCompressed)) 16 else 32
|
||||
case FetchWidth => if (site(UseCompressed)) 2 else 1
|
||||
case RetireWidth => 1
|
||||
case UseVM => true
|
||||
case UseUser => true
|
||||
case UseDebug => true
|
||||
case NBreakpoints => 1
|
||||
case FastLoadWord => true
|
||||
case FastLoadByte => false
|
||||
case XLen => 64
|
||||
case FPUKey => Some(FPUConfig())
|
||||
case MulDivKey => Some(MulDivConfig())
|
||||
case UseAtomics => true
|
||||
case UseCompressed => true
|
||||
case PLICKey => PLICConfig(site(NTiles), site(UseVM), site(NExtInterrupts), 0)
|
||||
case DMKey => new DefaultDebugModuleConfig(site(NTiles), site(XLen))
|
||||
case NCustomMRWCSRs => 0
|
||||
case ResetVector => BigInt(0x1000)
|
||||
case MtvecInit => BigInt(0x1010)
|
||||
case MtvecWritable => true
|
||||
//Uncore Paramters
|
||||
case RTCPeriod => 100 // gives 10 MHz RTC assuming 1 GHz uncore clock
|
||||
case LNEndpoints => site(TLKey(site(TLId))).nManagers + site(TLKey(site(TLId))).nClients
|
||||
case LNHeaderBits => log2Ceil(site(TLKey(site(TLId))).nManagers) +
|
||||
log2Up(site(TLKey(site(TLId))).nClients)
|
||||
case TLKey("L1toL2") => {
|
||||
val useMEI = site(NTiles) <= 1 && site(NCachedTileLinkPorts) <= 1
|
||||
TileLinkParameters(
|
||||
coherencePolicy = (
|
||||
if (useMEI) new MEICoherence(site(L2DirectoryRepresentation))
|
||||
else new MESICoherence(site(L2DirectoryRepresentation))),
|
||||
nManagers = site(NBanksPerMemoryChannel)*site(NMemoryChannels) + 1 /* MMIO */,
|
||||
nCachingClients = site(NCachedTileLinkPorts),
|
||||
nCachelessClients = site(NExternalClients) + site(NUncachedTileLinkPorts),
|
||||
maxClientXacts = max_int(
|
||||
// L1 cache
|
||||
site(DCacheKey).nMSHRs + 1 /* IOMSHR */,
|
||||
// RoCC
|
||||
if (site(BuildRoCC).isEmpty) 1 else site(RoccMaxTaggedMemXacts)),
|
||||
maxClientsPerPort = if (site(BuildRoCC).isEmpty) 1 else 2,
|
||||
maxManagerXacts = site(NAcquireTransactors) + 2,
|
||||
dataBeats = innerDataBeats,
|
||||
dataBits = site(CacheBlockBytes)*8)
|
||||
}
|
||||
case TLKey("L2toMC") =>
|
||||
TileLinkParameters(
|
||||
coherencePolicy = new MEICoherence(
|
||||
new NullRepresentation(site(NBanksPerMemoryChannel))),
|
||||
nManagers = 1,
|
||||
nCachingClients = site(NBanksPerMemoryChannel),
|
||||
nCachelessClients = 0,
|
||||
maxClientXacts = 1,
|
||||
maxClientsPerPort = site(NAcquireTransactors) + 2,
|
||||
maxManagerXacts = 1,
|
||||
dataBeats = innerDataBeats,
|
||||
dataBits = site(CacheBlockBytes)*8)
|
||||
case TLKey("Outermost") => site(TLKey("L2toMC")).copy(
|
||||
maxClientXacts = site(NAcquireTransactors) + 2,
|
||||
maxClientsPerPort = site(NBanksPerMemoryChannel),
|
||||
dataBeats = site(MIFDataBeats))
|
||||
case TLKey("L2toMMIO") => {
|
||||
TileLinkParameters(
|
||||
coherencePolicy = new MICoherence(
|
||||
new NullRepresentation(site(NBanksPerMemoryChannel))),
|
||||
nManagers = site(GlobalAddrMap).subMap("io").numSlaves,
|
||||
nCachingClients = 0,
|
||||
nCachelessClients = 1,
|
||||
maxClientXacts = 4,
|
||||
maxClientsPerPort = 1,
|
||||
maxManagerXacts = 1,
|
||||
dataBeats = innerDataBeats,
|
||||
dataBits = site(CacheBlockBytes) * 8)
|
||||
}
|
||||
case TLKey("MMIO_Outermost") => site(TLKey("L2toMMIO")).copy(dataBeats = site(MIFDataBeats))
|
||||
|
||||
case BootROMFile => "./bootrom/bootrom.img"
|
||||
case NTiles => Knob("NTILES")
|
||||
case NBanksPerMemoryChannel => Knob("NBANKS_PER_MEM_CHANNEL")
|
||||
case BankIdLSB => 0
|
||||
case CacheBlockBytes => Dump("CACHE_BLOCK_BYTES", 64)
|
||||
case CacheBlockOffsetBits => log2Up(here(CacheBlockBytes))
|
||||
case EnableL2Logging => false
|
||||
case ExtraCoreplexPorts => (p: Parameters) => new Bundle
|
||||
case RegressionTestNames => LinkedHashSet(
|
||||
"rv64ud-v-fcvt",
|
||||
"rv64ud-p-fdiv",
|
||||
"rv64ud-v-fadd",
|
||||
"rv64uf-v-fadd",
|
||||
"rv64um-v-mul",
|
||||
"rv64mi-p-breakpoint",
|
||||
"rv64uc-v-rvc",
|
||||
"rv64ud-v-structural",
|
||||
"rv64si-p-wfi",
|
||||
"rv64um-v-divw",
|
||||
"rv64ua-v-lrsc",
|
||||
"rv64ui-v-fence_i",
|
||||
"rv64ud-v-fcvt_w",
|
||||
"rv64uf-v-fmin",
|
||||
"rv64ui-v-sb",
|
||||
"rv64ua-v-amomax_d",
|
||||
"rv64ud-v-move",
|
||||
"rv64ud-v-fclass",
|
||||
"rv64ua-v-amoand_d",
|
||||
"rv64ua-v-amoxor_d",
|
||||
"rv64si-p-sbreak",
|
||||
"rv64ud-v-fmadd",
|
||||
"rv64uf-v-ldst",
|
||||
"rv64um-v-mulh",
|
||||
"rv64si-p-dirty")
|
||||
case _ => throw new CDEMatchError
|
||||
}},
|
||||
knobValues = {
|
||||
case "NTILES" => 1
|
||||
case "NBANKS_PER_MEM_CHANNEL" => 1
|
||||
case "L1D_MSHRS" => 2
|
||||
case "L1D_SETS" => 64
|
||||
case "L1D_WAYS" => 4
|
||||
case "L1I_SETS" => 64
|
||||
case "L1I_WAYS" => 4
|
||||
case _ => throw new CDEMatchError
|
||||
}
|
||||
)
|
||||
|
||||
class WithNCores(n: Int) extends Config(
|
||||
knobValues = { case"NTILES" => n; case _ => throw new CDEMatchError })
|
||||
|
||||
class WithNBanksPerMemChannel(n: Int) extends Config(
|
||||
knobValues = {
|
||||
case "NBANKS_PER_MEM_CHANNEL" => n
|
||||
case _ => throw new CDEMatchError
|
||||
})
|
||||
|
||||
class WithL2Cache extends Config(
|
||||
(pname,site,here) => pname match {
|
||||
case "L2_CAPACITY_IN_KB" => Knob("L2_CAPACITY_IN_KB")
|
||||
case "L2Bank" => {
|
||||
case NSets => (((here[Int]("L2_CAPACITY_IN_KB")*1024) /
|
||||
site(CacheBlockBytes)) /
|
||||
(site(NBanksPerMemoryChannel)*site(NMemoryChannels))) /
|
||||
site(NWays)
|
||||
case NWays => Knob("L2_WAYS")
|
||||
case RowBits => site(TLKey(site(TLId))).dataBitsPerBeat
|
||||
case CacheIdBits => log2Ceil(site(NMemoryChannels) * site(NBanksPerMemoryChannel))
|
||||
case SplitMetadata => Knob("L2_SPLIT_METADATA")
|
||||
}: PartialFunction[Any,Any]
|
||||
case NAcquireTransactors => 2
|
||||
case NSecondaryMisses => 4
|
||||
case L2DirectoryRepresentation => new FullRepresentation(site(NTiles))
|
||||
case BuildL2CoherenceManager => (id: Int, p: Parameters) =>
|
||||
Module(new L2HellaCacheBank()(p.alterPartial({
|
||||
case CacheId => id
|
||||
case CacheName => "L2Bank"
|
||||
case InnerTLId => "L1toL2"
|
||||
case OuterTLId => "L2toMC"})))
|
||||
case L2Replacer => () => new SeqRandom(site(NWays))
|
||||
case _ => throw new CDEMatchError
|
||||
},
|
||||
knobValues = { case "L2_WAYS" => 8; case "L2_CAPACITY_IN_KB" => 2048; case "L2_SPLIT_METADATA" => false; case _ => throw new CDEMatchError }
|
||||
)
|
||||
|
||||
class WithBufferlessBroadcastHub extends Config(
|
||||
(pname, site, here) => pname match {
|
||||
case BuildL2CoherenceManager => (id: Int, p: Parameters) =>
|
||||
Module(new BufferlessBroadcastHub()(p.alterPartial({
|
||||
case InnerTLId => "L1toL2"
|
||||
case OuterTLId => "L2toMC" })))
|
||||
})
|
||||
|
||||
/**
|
||||
* WARNING!!! IGNORE AT YOUR OWN PERIL!!!
|
||||
*
|
||||
* There is a very restrictive set of conditions under which the stateless
|
||||
* bridge will function properly. There can only be a single tile. This tile
|
||||
* MUST use the blocking data cache (L1D_MSHRS == 0) and MUST NOT have an
|
||||
* uncached channel capable of writes (i.e. a RoCC accelerator).
|
||||
*
|
||||
* This is because the stateless bridge CANNOT generate probes, so if your
|
||||
* system depends on coherence between channels in any way,
|
||||
* DO NOT use this configuration.
|
||||
*/
|
||||
class WithStatelessBridge extends Config (
|
||||
topDefinitions = (pname, site, here) => pname match {
|
||||
case BuildL2CoherenceManager => (id: Int, p: Parameters) =>
|
||||
Module(new ManagerToClientStatelessBridge()(p.alterPartial({
|
||||
case InnerTLId => "L1toL2"
|
||||
case OuterTLId => "L2toMC" })))
|
||||
},
|
||||
knobValues = {
|
||||
case "L1D_MSHRS" => 0
|
||||
case _ => throw new CDEMatchError
|
||||
}
|
||||
)
|
||||
|
||||
class WithPLRU extends Config(
|
||||
(pname, site, here) => pname match {
|
||||
case L2Replacer => () => new SeqPLRU(site(NSets), site(NWays))
|
||||
case _ => throw new CDEMatchError
|
||||
})
|
||||
|
||||
class WithL2Capacity(size_kb: Int) extends Config(
|
||||
knobValues = {
|
||||
case "L2_CAPACITY_IN_KB" => size_kb
|
||||
case _ => throw new CDEMatchError
|
||||
})
|
||||
|
||||
class WithNL2Ways(n: Int) extends Config(
|
||||
knobValues = {
|
||||
case "L2_WAYS" => n
|
||||
case _ => throw new CDEMatchError
|
||||
})
|
||||
|
||||
class WithRV32 extends Config(
|
||||
(pname,site,here) => pname match {
|
||||
case XLen => 32
|
||||
case UseVM => false
|
||||
case UseUser => false
|
||||
case UseAtomics => false
|
||||
case FPUKey => None
|
||||
case RegressionTestNames => LinkedHashSet(
|
||||
"rv32mi-p-ma_addr",
|
||||
"rv32mi-p-csr",
|
||||
"rv32ui-p-sh",
|
||||
"rv32ui-p-lh",
|
||||
"rv32mi-p-sbreak",
|
||||
"rv32ui-p-sll")
|
||||
case _ => throw new CDEMatchError
|
||||
}
|
||||
)
|
||||
|
||||
class WithBlockingL1 extends Config (
|
||||
knobValues = {
|
||||
case "L1D_MSHRS" => 0
|
||||
case _ => throw new CDEMatchError
|
||||
}
|
||||
)
|
||||
|
||||
class WithSmallCores extends Config (
|
||||
topDefinitions = { (pname,site,here) => pname match {
|
||||
case FPUKey => None
|
||||
case NTLBEntries => 4
|
||||
case BtbKey => BtbParameters(nEntries = 0)
|
||||
case NAcquireTransactors => 2
|
||||
case _ => throw new CDEMatchError
|
||||
}},
|
||||
knobValues = {
|
||||
case "L1D_SETS" => 64
|
||||
case "L1D_WAYS" => 1
|
||||
case "L1I_SETS" => 64
|
||||
case "L1I_WAYS" => 1
|
||||
case "L1D_MSHRS" => 0
|
||||
case _ => throw new CDEMatchError
|
||||
}
|
||||
)
|
||||
|
||||
class WithRoccExample extends Config(
|
||||
(pname, site, here) => pname match {
|
||||
case BuildRoCC => Seq(
|
||||
RoccParameters(
|
||||
opcodes = OpcodeSet.custom0,
|
||||
generator = (p: Parameters) => Module(new AccumulatorExample()(p))),
|
||||
RoccParameters(
|
||||
opcodes = OpcodeSet.custom1,
|
||||
generator = (p: Parameters) => Module(new TranslatorExample()(p)),
|
||||
nPTWPorts = 1),
|
||||
RoccParameters(
|
||||
opcodes = OpcodeSet.custom2,
|
||||
generator = (p: Parameters) => Module(new CharacterCountExample()(p))))
|
||||
|
||||
case RoccMaxTaggedMemXacts => 1
|
||||
case _ => throw new CDEMatchError
|
||||
})
|
||||
|
||||
class WithSplitL2Metadata extends Config(
|
||||
knobValues = { case "L2_SPLIT_METADATA" => true; case _ => throw new CDEMatchError })
|
||||
283
src/main/scala/coreplex/Coreplex.scala
Normal file
283
src/main/scala/coreplex/Coreplex.scala
Normal file
@@ -0,0 +1,283 @@
|
||||
package coreplex
|
||||
|
||||
import Chisel._
|
||||
import cde.{Parameters, Field}
|
||||
import junctions._
|
||||
import uncore.tilelink._
|
||||
import uncore.coherence._
|
||||
import uncore.agents._
|
||||
import uncore.devices._
|
||||
import uncore.util._
|
||||
import uncore.converters._
|
||||
import rocket._
|
||||
import rocket.Util._
|
||||
import java.nio.{ByteBuffer,ByteOrder}
|
||||
import java.nio.file.{Files, Paths}
|
||||
|
||||
/** Number of memory channels */
|
||||
case object NMemoryChannels extends Field[Int]
|
||||
/** Number of banks per memory channel */
|
||||
case object NBanksPerMemoryChannel extends Field[Int]
|
||||
/** Least significant bit of address used for bank partitioning */
|
||||
case object BankIdLSB extends Field[Int]
|
||||
/** Function for building some kind of coherence manager agent */
|
||||
case object BuildL2CoherenceManager extends Field[(Int, Parameters) => CoherenceAgent]
|
||||
/** Function for building some kind of tile connected to a reset signal */
|
||||
case object BuildTiles extends Field[Seq[(Bool, Parameters) => Tile]]
|
||||
/** A string describing on-chip devices, readable by target software */
|
||||
case object ConfigString extends Field[Array[Byte]]
|
||||
/** Number of external interrupt sources */
|
||||
case object NExtInterrupts extends Field[Int]
|
||||
/** Interrupt controller configuration */
|
||||
case object PLICKey extends Field[PLICConfig]
|
||||
/** Number of clock cycles per RTC tick */
|
||||
case object RTCPeriod extends Field[Int]
|
||||
/** The file to read the BootROM contents from */
|
||||
case object BootROMFile extends Field[String]
|
||||
/** Export an external MMIO slave port */
|
||||
case object ExportMMIOPort extends Field[Boolean]
|
||||
/** Expose additional TileLink client ports */
|
||||
case object NExternalClients extends Field[Int]
|
||||
/** Extra top-level ports exported from the coreplex */
|
||||
case object ExtraCoreplexPorts extends Field[Parameters => Bundle]
|
||||
|
||||
trait HasCoreplexParameters {
|
||||
implicit val p: Parameters
|
||||
lazy val nTiles = p(NTiles)
|
||||
lazy val nCachedTilePorts = p(NCachedTileLinkPorts)
|
||||
lazy val nUncachedTilePorts = p(NUncachedTileLinkPorts)
|
||||
lazy val nMemChannels = p(NMemoryChannels)
|
||||
lazy val nBanksPerMemChannel = p(NBanksPerMemoryChannel)
|
||||
lazy val nBanks = nMemChannels*nBanksPerMemChannel
|
||||
lazy val lsb = p(BankIdLSB)
|
||||
lazy val innerParams = p.alterPartial({ case TLId => "L1toL2" })
|
||||
lazy val outermostParams = p.alterPartial({ case TLId => "Outermost" })
|
||||
lazy val outermostMMIOParams = p.alterPartial({ case TLId => "MMIO_Outermost" })
|
||||
lazy val nExtClients = p(NExternalClients)
|
||||
lazy val exportMMIO = p(ExportMMIOPort)
|
||||
}
|
||||
|
||||
/** Wrapper around everything that isn't a Tile.
|
||||
*
|
||||
* Usually this is clocked and/or place-and-routed separately from the Tiles.
|
||||
*/
|
||||
class Uncore(implicit val p: Parameters) extends Module
|
||||
with HasCoreplexParameters {
|
||||
|
||||
val io = new Bundle {
|
||||
val mem = Vec(nMemChannels, new ClientUncachedTileLinkIO()(outermostParams))
|
||||
val tiles_cached = Vec(nCachedTilePorts, new ClientTileLinkIO).flip
|
||||
val tiles_uncached = Vec(nUncachedTilePorts, new ClientUncachedTileLinkIO).flip
|
||||
val ext_uncached = Vec(nExtClients, new ClientUncachedTileLinkIO()(innerParams)).flip
|
||||
val prci = Vec(nTiles, new PRCITileIO).asOutput
|
||||
val mmio = if (exportMMIO) Some(new ClientUncachedTileLinkIO()(outermostMMIOParams)) else None
|
||||
val interrupts = Vec(p(NExtInterrupts), Bool()).asInput
|
||||
val debug = new DebugBusIO()(p).flip
|
||||
}
|
||||
|
||||
val outmemsys = if (nCachedTilePorts + nUncachedTilePorts > 0)
|
||||
Module(new DefaultOuterMemorySystem) // NoC, LLC and SerDes
|
||||
else Module(new DummyOuterMemorySystem)
|
||||
outmemsys.io.incoherent foreach (_ := false)
|
||||
outmemsys.io.tiles_uncached <> io.tiles_uncached
|
||||
outmemsys.io.tiles_cached <> io.tiles_cached
|
||||
outmemsys.io.ext_uncached <> io.ext_uncached
|
||||
io.mem <> outmemsys.io.mem
|
||||
|
||||
buildMMIONetwork(p.alterPartial({case TLId => "L2toMMIO"}))
|
||||
|
||||
def makeBootROM()(implicit p: Parameters) = {
|
||||
val romdata = Files.readAllBytes(Paths.get(p(BootROMFile)))
|
||||
val rom = ByteBuffer.wrap(romdata)
|
||||
|
||||
rom.order(ByteOrder.LITTLE_ENDIAN)
|
||||
|
||||
// for now, have the reset vector jump straight to memory
|
||||
val resetToMemDist = p(GlobalAddrMap)("mem").start - p(ResetVector)
|
||||
require(resetToMemDist == (resetToMemDist.toInt >> 12 << 12))
|
||||
val configStringAddr = p(ResetVector).toInt + rom.capacity
|
||||
|
||||
require(rom.getInt(12) == 0,
|
||||
"Config string address position should not be occupied by code")
|
||||
rom.putInt(12, configStringAddr)
|
||||
rom.array() ++ p(ConfigString).toSeq
|
||||
}
|
||||
|
||||
def buildMMIONetwork(implicit p: Parameters) = {
|
||||
val ioAddrMap = p(GlobalAddrMap).subMap("io")
|
||||
|
||||
val mmioNetwork = Module(new TileLinkRecursiveInterconnect(1, ioAddrMap))
|
||||
mmioNetwork.io.in.head <> outmemsys.io.mmio
|
||||
|
||||
val plic = Module(new PLIC(p(PLICKey)))
|
||||
plic.io.tl <> mmioNetwork.port("int:plic")
|
||||
for (i <- 0 until io.interrupts.size) {
|
||||
val gateway = Module(new LevelGateway)
|
||||
gateway.io.interrupt := io.interrupts(i)
|
||||
plic.io.devices(i) <> gateway.io.plic
|
||||
}
|
||||
|
||||
val debugModule = Module(new DebugModule)
|
||||
debugModule.io.tl <> mmioNetwork.port("int:debug")
|
||||
debugModule.io.db <> io.debug
|
||||
|
||||
val prci = Module(new PRCI)
|
||||
prci.io.tl <> mmioNetwork.port("int:prci")
|
||||
io.prci := prci.io.tiles
|
||||
prci.io.rtcTick := Counter(p(RTCPeriod)).inc() // placeholder for real RTC
|
||||
|
||||
for (i <- 0 until nTiles) {
|
||||
prci.io.interrupts(i).meip := plic.io.harts(plic.cfg.context(i, 'M'))
|
||||
if (p(UseVM))
|
||||
prci.io.interrupts(i).seip := plic.io.harts(plic.cfg.context(i, 'S'))
|
||||
prci.io.interrupts(i).debug := debugModule.io.debugInterrupts(i)
|
||||
|
||||
io.prci(i).reset := reset
|
||||
}
|
||||
|
||||
val bootROM = Module(new ROMSlave(makeBootROM()))
|
||||
bootROM.io <> mmioNetwork.port("int:bootrom")
|
||||
|
||||
io.mmio.map { ext => ext <> mmioNetwork.port("ext") }
|
||||
}
|
||||
}
|
||||
|
||||
abstract class OuterMemorySystem(implicit val p: Parameters)
|
||||
extends Module with HasCoreplexParameters {
|
||||
val io = new Bundle {
|
||||
val tiles_cached = Vec(nCachedTilePorts, new ClientTileLinkIO).flip
|
||||
val tiles_uncached = Vec(nUncachedTilePorts, new ClientUncachedTileLinkIO).flip
|
||||
val ext_uncached = Vec(nExtClients, new ClientUncachedTileLinkIO()(innerParams)).flip
|
||||
val incoherent = Vec(nCachedTilePorts, Bool()).asInput
|
||||
val mem = Vec(nMemChannels, new ClientUncachedTileLinkIO()(outermostParams))
|
||||
val mmio = new ClientUncachedTileLinkIO()(p.alterPartial({case TLId => "L2toMMIO"}))
|
||||
}
|
||||
}
|
||||
|
||||
/** Use in place of OuterMemorySystem if there are no clients to connect. */
|
||||
class DummyOuterMemorySystem(implicit p: Parameters) extends OuterMemorySystem()(p) {
|
||||
require(nCachedTilePorts + nUncachedTilePorts + nExtClients == 0)
|
||||
|
||||
io.mem.foreach { tl =>
|
||||
tl.acquire.valid := Bool(false)
|
||||
tl.grant.ready := Bool(false)
|
||||
}
|
||||
|
||||
io.mmio.acquire.valid := Bool(false)
|
||||
io.mmio.grant.ready := Bool(false)
|
||||
}
|
||||
|
||||
/** The whole outer memory hierarchy, including a NoC, some kind of coherence
|
||||
* manager agent, and a converter from TileLink to MemIO.
|
||||
*/
|
||||
class DefaultOuterMemorySystem(implicit p: Parameters) extends OuterMemorySystem()(p) {
|
||||
// Create a simple L1toL2 NoC between the tiles and the banks of outer memory
|
||||
// Cached ports are first in client list, making sharerToClientId just an indentity function
|
||||
// addrToBank is sed to hash physical addresses (of cache blocks) to banks (and thereby memory channels)
|
||||
def sharerToClientId(sharerId: UInt) = sharerId
|
||||
def addrToBank(addr: UInt): UInt = {
|
||||
val isMemory = p(GlobalAddrMap).isInRegion("mem", addr << log2Up(p(CacheBlockBytes)))
|
||||
Mux(isMemory,
|
||||
if (nBanks > 1) addr(lsb + log2Up(nBanks) - 1, lsb) else UInt(0),
|
||||
UInt(nBanks))
|
||||
}
|
||||
val preBuffering = TileLinkDepths(1,1,2,2,0)
|
||||
val l1tol2net = Module(new PortedTileLinkCrossbar(addrToBank, sharerToClientId, preBuffering))
|
||||
|
||||
// Create point(s) of coherence serialization
|
||||
val managerEndpoints = List.tabulate(nBanks){id => p(BuildL2CoherenceManager)(id, p)}
|
||||
managerEndpoints.foreach { _.incoherent := io.incoherent }
|
||||
|
||||
val mmioManager = Module(new MMIOTileLinkManager()(p.alterPartial({
|
||||
case TLId => "L1toL2"
|
||||
case InnerTLId => "L1toL2"
|
||||
case OuterTLId => "L2toMMIO"
|
||||
})))
|
||||
io.mmio <> mmioManager.io.outer
|
||||
|
||||
// Wire the tiles to the TileLink client ports of the L1toL2 network,
|
||||
// and coherence manager(s) to the other side
|
||||
l1tol2net.io.clients_cached <> io.tiles_cached
|
||||
l1tol2net.io.clients_uncached <> io.tiles_uncached ++ io.ext_uncached
|
||||
l1tol2net.io.managers <> managerEndpoints.map(_.innerTL) :+ mmioManager.io.inner
|
||||
|
||||
// Create a converter between TileLinkIO and MemIO for each channel
|
||||
val outerTLParams = p.alterPartial({ case TLId => "L2toMC" })
|
||||
val backendBuffering = TileLinkDepths(0,0,0,0,0)
|
||||
|
||||
// TODO: the code to print this stuff should live somewhere else
|
||||
println("Generated Address Map")
|
||||
for (entry <- p(GlobalAddrMap).flatten) {
|
||||
val name = entry.name
|
||||
val start = entry.region.start
|
||||
val end = entry.region.start + entry.region.size - 1
|
||||
println(f"\t$name%s $start%x - $end%x")
|
||||
}
|
||||
println("Generated Configuration String")
|
||||
println(new String(p(ConfigString)))
|
||||
|
||||
val mem_ic = Module(new TileLinkMemoryInterconnect(nBanksPerMemChannel, nMemChannels)(outermostParams))
|
||||
|
||||
for ((bank, icPort) <- managerEndpoints zip mem_ic.io.in) {
|
||||
val unwrap = Module(new ClientTileLinkIOUnwrapper()(outerTLParams))
|
||||
unwrap.io.in <> ClientTileLinkEnqueuer(bank.outerTL, backendBuffering)(outerTLParams)
|
||||
TileLinkWidthAdapter(icPort, unwrap.io.out)
|
||||
}
|
||||
|
||||
io.mem <> mem_ic.io.out
|
||||
}
|
||||
|
||||
abstract class Coreplex(implicit val p: Parameters) extends Module
|
||||
with HasCoreplexParameters {
|
||||
class CoreplexIO(implicit val p: Parameters) extends Bundle {
|
||||
val mem = Vec(nMemChannels, new ClientUncachedTileLinkIO()(outermostParams))
|
||||
val ext_clients = Vec(nExtClients, new ClientUncachedTileLinkIO()(innerParams)).flip
|
||||
val mmio = if(p(ExportMMIOPort)) Some(new ClientUncachedTileLinkIO()(outermostMMIOParams)) else None
|
||||
val interrupts = Vec(p(NExtInterrupts), Bool()).asInput
|
||||
val debug = new DebugBusIO()(p).flip
|
||||
val extra = p(ExtraCoreplexPorts)(p)
|
||||
val success: Option[Bool] = if (hasSuccessFlag) Some(Bool(OUTPUT)) else None
|
||||
}
|
||||
|
||||
def hasSuccessFlag: Boolean = false
|
||||
val io = new CoreplexIO
|
||||
}
|
||||
|
||||
class DefaultCoreplex(topParams: Parameters) extends Coreplex()(topParams) {
|
||||
// Build an Uncore and a set of Tiles
|
||||
val tileResets = Wire(Vec(nTiles, Bool()))
|
||||
val tileList = p(BuildTiles).zip(tileResets).map {
|
||||
case (tile, rst) => tile(rst, p)
|
||||
}
|
||||
val nCachedPorts = tileList.map(tile => tile.io.cached.size).reduce(_ + _)
|
||||
val nUncachedPorts = tileList.map(tile => tile.io.uncached.size).reduce(_ + _)
|
||||
|
||||
val innerTLParams = p.alterPartial({
|
||||
case HastiId => "TL"
|
||||
case TLId => "L1toL2"
|
||||
case NCachedTileLinkPorts => nCachedPorts
|
||||
case NUncachedTileLinkPorts => nUncachedPorts
|
||||
})
|
||||
|
||||
val uncore = Module(new Uncore()(innerTLParams))
|
||||
|
||||
(uncore.io.prci, tileResets, tileList).zipped.foreach {
|
||||
case (prci, rst, tile) =>
|
||||
rst := prci.reset
|
||||
tile.io.prci <> prci
|
||||
}
|
||||
|
||||
// Connect the uncore to the tile memory ports, HostIO and MemIO
|
||||
uncore.io.tiles_cached <> tileList.map(_.io.cached).flatten
|
||||
uncore.io.tiles_uncached <> tileList.map(_.io.uncached).flatten
|
||||
uncore.io.interrupts <> io.interrupts
|
||||
uncore.io.debug <> io.debug
|
||||
uncore.io.ext_uncached <> io.ext_clients
|
||||
if (exportMMIO) { io.mmio.get <> uncore.io.mmio.get }
|
||||
io.mem <> uncore.io.mem
|
||||
}
|
||||
|
||||
class GroundTestCoreplex(topParams: Parameters) extends DefaultCoreplex(topParams) {
|
||||
override def hasSuccessFlag = true
|
||||
io.success.get := tileList.flatMap(_.io.elements get "success").map(_.asInstanceOf[Bool]).reduce(_&&_)
|
||||
}
|
||||
62
src/main/scala/coreplex/DirectGroundTest.scala
Normal file
62
src/main/scala/coreplex/DirectGroundTest.scala
Normal file
@@ -0,0 +1,62 @@
|
||||
package coreplex
|
||||
|
||||
import Chisel._
|
||||
import cde.{Parameters, Field}
|
||||
import groundtest._
|
||||
import uncore.tilelink._
|
||||
import uncore.agents._
|
||||
|
||||
case object ExportGroundTestStatus extends Field[Boolean]
|
||||
|
||||
class DirectGroundTestCoreplex(topParams: Parameters) extends Coreplex()(topParams) {
|
||||
// Not using the debug
|
||||
io.debug.req.ready := Bool(false)
|
||||
io.debug.resp.valid := Bool(false)
|
||||
|
||||
require(!exportMMIO)
|
||||
require(nExtClients == 0)
|
||||
require(nMemChannels == 1)
|
||||
require(nTiles == 1)
|
||||
|
||||
val test = p(BuildGroundTest)(outermostParams.alterPartial({
|
||||
case GroundTestId => 0
|
||||
case CacheName => "L1D"
|
||||
}))
|
||||
require(test.io.cache.size == 0)
|
||||
require(test.io.mem.size == nBanksPerMemChannel)
|
||||
require(test.io.ptw.size == 0)
|
||||
|
||||
val mem_ic = Module(new TileLinkMemoryInterconnect(
|
||||
nBanksPerMemChannel, nMemChannels)(outermostParams))
|
||||
|
||||
mem_ic.io.in <> test.io.mem
|
||||
io.mem <> mem_ic.io.out
|
||||
|
||||
if (p(ExportGroundTestStatus)) {
|
||||
val status = io.extra.asInstanceOf[GroundTestStatus]
|
||||
|
||||
val s_running :: s_finished :: s_errored :: s_timeout :: Nil = Enum(Bits(), 4)
|
||||
val state = Reg(init = s_running)
|
||||
val error_code = Reg(status.error.bits)
|
||||
val timeout_code = Reg(status.timeout.bits)
|
||||
when (state === s_running) {
|
||||
when (test.io.status.finished) { state := s_finished }
|
||||
when (test.io.status.error.valid) {
|
||||
state := s_errored
|
||||
error_code := test.io.status.error.bits
|
||||
}
|
||||
when (test.io.status.timeout.valid) {
|
||||
state := s_timeout
|
||||
timeout_code := test.io.status.timeout.bits
|
||||
}
|
||||
}
|
||||
status.finished := (state === s_finished)
|
||||
status.error.valid := (state === s_errored)
|
||||
status.error.bits := error_code
|
||||
status.timeout.valid := (state === s_timeout)
|
||||
status.timeout.bits := timeout_code
|
||||
}
|
||||
|
||||
override def hasSuccessFlag = true
|
||||
io.success.get := test.io.status.finished
|
||||
}
|
||||
200
src/main/scala/coreplex/TestConfigs.scala
Normal file
200
src/main/scala/coreplex/TestConfigs.scala
Normal file
@@ -0,0 +1,200 @@
|
||||
package coreplex
|
||||
|
||||
import Chisel._
|
||||
import groundtest._
|
||||
import rocket._
|
||||
import uncore.tilelink._
|
||||
import uncore.coherence._
|
||||
import uncore.agents._
|
||||
import uncore.devices.NTiles
|
||||
import uncore.unittests._
|
||||
import junctions._
|
||||
import junctions.unittests._
|
||||
import scala.collection.mutable.LinkedHashSet
|
||||
import cde.{Parameters, Config, Dump, Knob, CDEMatchError}
|
||||
import scala.math.max
|
||||
import ConfigUtils._
|
||||
|
||||
class WithComparator extends Config(
|
||||
(pname, site, here) => pname match {
|
||||
case GroundTestKey => Seq.fill(site(NTiles)) {
|
||||
GroundTestTileSettings(uncached = site(ComparatorKey).targets.size)
|
||||
}
|
||||
case BuildGroundTest =>
|
||||
(p: Parameters) => Module(new ComparatorCore()(p))
|
||||
case ComparatorKey => ComparatorParameters(
|
||||
targets = Seq("mem", "io:ext:testram").map(name =>
|
||||
site(GlobalAddrMap)(name).start.longValue),
|
||||
width = 8,
|
||||
operations = 1000,
|
||||
atomics = site(UseAtomics),
|
||||
prefetches = site("COMPARATOR_PREFETCHES"))
|
||||
case FPUConfig => None
|
||||
case UseAtomics => false
|
||||
case "COMPARATOR_PREFETCHES" => false
|
||||
case _ => throw new CDEMatchError
|
||||
})
|
||||
|
||||
class WithAtomics extends Config(
|
||||
(pname, site, here) => pname match {
|
||||
case UseAtomics => true
|
||||
case _ => throw new CDEMatchError
|
||||
})
|
||||
|
||||
class WithPrefetches extends Config(
|
||||
(pname, site, here) => pname match {
|
||||
case "COMPARATOR_PREFETCHES" => true
|
||||
case _ => throw new CDEMatchError
|
||||
})
|
||||
|
||||
class WithMemtest extends Config(
|
||||
(pname, site, here) => pname match {
|
||||
case GroundTestKey => Seq.fill(site(NTiles)) {
|
||||
GroundTestTileSettings(1, 1)
|
||||
}
|
||||
case GeneratorKey => GeneratorParameters(
|
||||
maxRequests = 128,
|
||||
startAddress = site(GlobalAddrMap)("mem").start)
|
||||
case BuildGroundTest =>
|
||||
(p: Parameters) => Module(new GeneratorTest()(p))
|
||||
case _ => throw new CDEMatchError
|
||||
})
|
||||
|
||||
class WithNGenerators(nUncached: Int, nCached: Int) extends Config(
|
||||
(pname, site, here) => pname match {
|
||||
case GroundTestKey => Seq.fill(site(NTiles)) {
|
||||
GroundTestTileSettings(nUncached, nCached)
|
||||
}
|
||||
case _ => throw new CDEMatchError
|
||||
})
|
||||
|
||||
class WithCacheFillTest extends Config(
|
||||
(pname, site, here) => pname match {
|
||||
case GroundTestKey => Seq.fill(site(NTiles)) {
|
||||
GroundTestTileSettings(uncached = 1)
|
||||
}
|
||||
case BuildGroundTest =>
|
||||
(p: Parameters) => Module(new CacheFillTest()(p))
|
||||
case _ => throw new CDEMatchError
|
||||
},
|
||||
knobValues = {
|
||||
case "L2_WAYS" => 4
|
||||
case "L2_CAPACITY_IN_KB" => 4
|
||||
case _ => throw new CDEMatchError
|
||||
})
|
||||
|
||||
class WithBroadcastRegressionTest extends Config(
|
||||
(pname, site, here) => pname match {
|
||||
case GroundTestKey => Seq.fill(site(NTiles)) {
|
||||
GroundTestTileSettings(1, 1, maxXacts = 3)
|
||||
}
|
||||
case BuildGroundTest =>
|
||||
(p: Parameters) => Module(new RegressionTest()(p))
|
||||
case GroundTestRegressions =>
|
||||
(p: Parameters) => RegressionTests.broadcastRegressions(p)
|
||||
case _ => throw new CDEMatchError
|
||||
})
|
||||
|
||||
class WithCacheRegressionTest extends Config(
|
||||
(pname, site, here) => pname match {
|
||||
case GroundTestKey => Seq.fill(site(NTiles)) {
|
||||
GroundTestTileSettings(1, 1, maxXacts = 5)
|
||||
}
|
||||
case BuildGroundTest =>
|
||||
(p: Parameters) => Module(new RegressionTest()(p))
|
||||
case GroundTestRegressions =>
|
||||
(p: Parameters) => RegressionTests.cacheRegressions(p)
|
||||
case _ => throw new CDEMatchError
|
||||
})
|
||||
|
||||
class WithNastiConverterTest extends Config(
|
||||
(pname, site, here) => pname match {
|
||||
case GroundTestKey => Seq.fill(site(NTiles)) {
|
||||
GroundTestTileSettings(uncached = 1)
|
||||
}
|
||||
case GeneratorKey => GeneratorParameters(
|
||||
maxRequests = 128,
|
||||
startAddress = site(GlobalAddrMap)("mem").start)
|
||||
case BuildGroundTest =>
|
||||
(p: Parameters) => Module(new NastiConverterTest()(p))
|
||||
case _ => throw new CDEMatchError
|
||||
})
|
||||
|
||||
class WithTraceGen extends Config(
|
||||
topDefinitions = (pname, site, here) => pname match {
|
||||
case GroundTestKey => Seq.fill(site(NTiles)) {
|
||||
GroundTestTileSettings(uncached = 1, cached = 1)
|
||||
}
|
||||
case BuildGroundTest =>
|
||||
(p: Parameters) => Module(new GroundTestTraceGenerator()(p))
|
||||
case GeneratorKey => GeneratorParameters(
|
||||
maxRequests = 256,
|
||||
startAddress = 0)
|
||||
case AddressBag => {
|
||||
val nSets = 32 // L2 NSets
|
||||
val nWays = 1
|
||||
val blockOffset = site(CacheBlockOffsetBits)
|
||||
val baseAddr = site(GlobalAddrMap)("mem").start
|
||||
val nBeats = site(MIFDataBeats)
|
||||
List.tabulate(4 * nWays) { i =>
|
||||
Seq.tabulate(nBeats) { j => (j * 8) + ((i * nSets) << blockOffset) }
|
||||
}.flatten.map(addr => baseAddr + BigInt(addr))
|
||||
}
|
||||
case UseAtomics => true
|
||||
case _ => throw new CDEMatchError
|
||||
},
|
||||
knobValues = {
|
||||
case "L1D_SETS" => 16
|
||||
case "L1D_WAYS" => 1
|
||||
})
|
||||
|
||||
class WithPCIeMockupTest extends Config(
|
||||
(pname, site, here) => pname match {
|
||||
case NTiles => 2
|
||||
case GroundTestKey => Seq(
|
||||
GroundTestTileSettings(1, 1),
|
||||
GroundTestTileSettings(1))
|
||||
case GeneratorKey => GeneratorParameters(
|
||||
maxRequests = 128,
|
||||
startAddress = site(GlobalAddrMap)("mem").start)
|
||||
case BuildGroundTest =>
|
||||
(p: Parameters) => {
|
||||
val id = p(GroundTestId)
|
||||
if (id == 0) Module(new GeneratorTest()(p))
|
||||
else Module(new NastiConverterTest()(p))
|
||||
}
|
||||
case _ => throw new CDEMatchError
|
||||
})
|
||||
|
||||
class WithDirectMemtest extends Config(
|
||||
(pname, site, here) => {
|
||||
val nGens = 8
|
||||
pname match {
|
||||
case GroundTestKey => Seq(GroundTestTileSettings(uncached = nGens))
|
||||
case GeneratorKey => GeneratorParameters(
|
||||
maxRequests = 1024,
|
||||
startAddress = 0)
|
||||
case BuildGroundTest =>
|
||||
(p: Parameters) => Module(new GeneratorTest()(p))
|
||||
case _ => throw new CDEMatchError
|
||||
}
|
||||
})
|
||||
|
||||
class WithDirectComparator extends Config(
|
||||
(pname, site, here) => pname match {
|
||||
case GroundTestKey => Seq.fill(site(NTiles)) {
|
||||
GroundTestTileSettings(uncached = site(ComparatorKey).targets.size)
|
||||
}
|
||||
case BuildGroundTest =>
|
||||
(p: Parameters) => Module(new ComparatorCore()(p))
|
||||
case ComparatorKey => ComparatorParameters(
|
||||
targets = Seq(0L, 0x100L),
|
||||
width = 8,
|
||||
operations = 1000,
|
||||
atomics = site(UseAtomics),
|
||||
prefetches = site("COMPARATOR_PREFETCHES"))
|
||||
case FPUConfig => None
|
||||
case UseAtomics => false
|
||||
case "COMPARATOR_PREFETCHES" => false
|
||||
case _ => throw new CDEMatchError
|
||||
})
|
||||
186
src/main/scala/coreplex/Testing.scala
Normal file
186
src/main/scala/coreplex/Testing.scala
Normal file
@@ -0,0 +1,186 @@
|
||||
// See LICENSE for license details.
|
||||
|
||||
package coreplex
|
||||
|
||||
import Chisel._
|
||||
import scala.collection.mutable.{LinkedHashSet,LinkedHashMap}
|
||||
import cde.{Parameters, ParameterDump, Config, Field, CDEMatchError}
|
||||
|
||||
case object RegressionTestNames extends Field[LinkedHashSet[String]]
|
||||
|
||||
abstract class RocketTestSuite {
|
||||
val dir: String
|
||||
val makeTargetName: String
|
||||
val names: LinkedHashSet[String]
|
||||
val envName: String
|
||||
def postScript = s"""
|
||||
|
||||
$$(addprefix $$(output_dir)/, $$(addsuffix .hex, $$($makeTargetName))): $$(output_dir)/%.hex: $dir/%.hex
|
||||
\tmkdir -p $$(output_dir)
|
||||
\tln -fs $$< $$@
|
||||
|
||||
$$(addprefix $$(output_dir)/, $$($makeTargetName)): $$(output_dir)/%: $dir/%
|
||||
\tmkdir -p $$(output_dir)
|
||||
\tln -fs $$< $$@
|
||||
|
||||
run-$makeTargetName: $$(addprefix $$(output_dir)/, $$(addsuffix .out, $$($makeTargetName)))
|
||||
\t@echo; perl -ne 'print " [$$$$1] $$$$ARGV \\t$$$$2\\n" if( /\\*{3}(.{8})\\*{3}(.*)/ || /ASSERTION (FAILED)/i )' $$^ /dev/null | perl -ne 'if(/(.*)/){print "$$$$1\\n\\n"; exit(1) if eof()}'
|
||||
|
||||
run-$makeTargetName-debug: $$(addprefix $$(output_dir)/, $$(addsuffix .vpd, $$($makeTargetName)))
|
||||
\t@echo; perl -ne 'print " [$$$$1] $$$$ARGV \\t$$$$2\\n" if( /\\*{3}(.{8})\\*{3}(.*)/ || /ASSERTION (FAILED)/i )' $$(patsubst %.vpd,%.out,$$^) /dev/null | perl -ne 'if(/(.*)/){print "$$$$1\\n\\n"; exit(1) if eof()}'
|
||||
"""
|
||||
}
|
||||
|
||||
class AssemblyTestSuite(prefix: String, val names: LinkedHashSet[String])(val envName: String) extends RocketTestSuite {
|
||||
val dir = "$(RISCV)/riscv64-unknown-elf/share/riscv-tests/isa"
|
||||
val makeTargetName = prefix + "-" + envName + "-asm-tests"
|
||||
override def toString = s"$makeTargetName = \\\n" + names.map(n => s"\t$prefix-$envName-$n").mkString(" \\\n") + postScript
|
||||
}
|
||||
|
||||
class BenchmarkTestSuite(makePrefix: String, val dir: String, val names: LinkedHashSet[String]) extends RocketTestSuite {
|
||||
val envName = ""
|
||||
val makeTargetName = makePrefix + "-bmark-tests"
|
||||
override def toString = s"$makeTargetName = \\\n" + names.map(n => s"\t$n.riscv").mkString(" \\\n") + postScript
|
||||
}
|
||||
|
||||
class RegressionTestSuite(val names: LinkedHashSet[String]) extends RocketTestSuite {
|
||||
val envName = ""
|
||||
val dir = "$(RISCV)/riscv64-unknown-elf/share/riscv-tests/isa"
|
||||
val makeTargetName = "regression-tests"
|
||||
override def toString = s"$makeTargetName = \\\n" + names.mkString(" \\\n")
|
||||
}
|
||||
|
||||
object TestGeneration {
|
||||
import scala.collection.mutable.HashMap
|
||||
val asmSuites = new LinkedHashMap[String,AssemblyTestSuite]()
|
||||
val bmarkSuites = new LinkedHashMap[String,BenchmarkTestSuite]()
|
||||
val regressionSuites = new LinkedHashMap[String,RegressionTestSuite]()
|
||||
|
||||
def addSuite(s: RocketTestSuite) {
|
||||
s match {
|
||||
case a: AssemblyTestSuite => asmSuites += (a.makeTargetName -> a)
|
||||
case b: BenchmarkTestSuite => bmarkSuites += (b.makeTargetName -> b)
|
||||
case r: RegressionTestSuite => regressionSuites += (r.makeTargetName -> r)
|
||||
}
|
||||
}
|
||||
|
||||
def addSuites(s: Seq[RocketTestSuite]) { s.foreach(addSuite) }
|
||||
|
||||
def generateMakefrag(topModuleName: String, configClassName: String) {
|
||||
def gen(kind: String, s: Seq[RocketTestSuite]) = {
|
||||
if(s.length > 0) {
|
||||
val envs = s.groupBy(_.envName)
|
||||
val targets = s.map(t => s"$$(${t.makeTargetName})").mkString(" ")
|
||||
s.map(_.toString).mkString("\n") +
|
||||
envs.filterKeys(_ != "").map( {
|
||||
case (env,envsuites) => {
|
||||
val suites = envsuites.map(t => s"$$(${t.makeTargetName})").mkString(" ")
|
||||
s"""
|
||||
run-$kind-$env-tests: $$(addprefix $$(output_dir)/, $$(addsuffix .out, $suites))
|
||||
\t@echo; perl -ne 'print " [$$$$1] $$$$ARGV \\t$$$$2\\n" if( /\\*{3}(.{8})\\*{3}(.*)/ || /ASSERTION (FAILED)/i )' $$^ /dev/null | perl -ne 'if(/(.*)/){print "$$$$1\\n\\n"; exit(1) if eof()}'
|
||||
run-$kind-$env-tests-debug: $$(addprefix $$(output_dir)/, $$(addsuffix .vpd, $suites))
|
||||
\t@echo; perl -ne 'print " [$$$$1] $$$$ARGV \\t$$$$2\\n" if( /\\*{3}(.{8})\\*{3}(.*)/ || /ASSERTION (FAILED)/i )' $$(patsubst %.vpd,%.out,$$^) /dev/null | perl -ne 'if(/(.*)/){print "$$$$1\\n\\n"; exit(1) if eof()}'
|
||||
run-$kind-$env-tests-fast: $$(addprefix $$(output_dir)/, $$(addsuffix .run, $suites))
|
||||
\t@echo; perl -ne 'print " [$$$$1] $$$$ARGV \\t$$$$2\\n" if( /\\*{3}(.{8})\\*{3}(.*)/ || /ASSERTION (FAILED)/i )' $$^ /dev/null | perl -ne 'if(/(.*)/){print "$$$$1\\n\\n"; exit(1) if eof()}'
|
||||
"""} } ).mkString("\n") + s"""
|
||||
run-$kind-tests: $$(addprefix $$(output_dir)/, $$(addsuffix .out, $targets))
|
||||
\t@echo; perl -ne 'print " [$$$$1] $$$$ARGV \\t$$$$2\\n" if( /\\*{3}(.{8})\\*{3}(.*)/ || /ASSERTION (FAILED)/i )' $$^ /dev/null | perl -ne 'if(/(.*)/){print "$$$$1\\n\\n"; exit(1) if eof()}'
|
||||
run-$kind-tests-debug: $$(addprefix $$(output_dir)/, $$(addsuffix .vpd, $targets))
|
||||
\t@echo; perl -ne 'print " [$$$$1] $$$$ARGV \\t$$$$2\\n" if( /\\*{3}(.{8})\\*{3}(.*)/ || /ASSERTION (FAILED)/i )' $$(patsubst %.vpd,%.out,$$^) /dev/null | perl -ne 'if(/(.*)/){print "$$$$1\\n\\n"; exit(1) if eof()}'
|
||||
run-$kind-tests-fast: $$(addprefix $$(output_dir)/, $$(addsuffix .run, $targets))
|
||||
\t@echo; perl -ne 'print " [$$$$1] $$$$ARGV \\t$$$$2\\n" if( /\\*{3}(.{8})\\*{3}(.*)/ || /ASSERTION (FAILED)/i )' $$^ /dev/null | perl -ne 'if(/(.*)/){print "$$$$1\\n\\n"; exit(1) if eof()}'
|
||||
"""
|
||||
} else { "\n" }
|
||||
}
|
||||
|
||||
val f = createOutputFile(s"$topModuleName.$configClassName.d")
|
||||
f.write(
|
||||
List(
|
||||
gen("asm", asmSuites.values.toSeq),
|
||||
gen("bmark", bmarkSuites.values.toSeq),
|
||||
gen("regression", regressionSuites.values.toSeq)
|
||||
).mkString("\n"))
|
||||
f.close
|
||||
}
|
||||
|
||||
def createOutputFile(name: String) =
|
||||
new java.io.FileWriter(s"${Driver.targetDir}/$name")
|
||||
}
|
||||
|
||||
object DefaultTestSuites {
|
||||
val rv32uiNames = LinkedHashSet(
|
||||
"simple", "add", "addi", "and", "andi", "auipc", "beq", "bge", "bgeu", "blt", "bltu", "bne", "fence_i",
|
||||
"jal", "jalr", "lb", "lbu", "lh", "lhu", "lui", "lw", "or", "ori", "sb", "sh", "sw", "sll", "slli",
|
||||
"slt", "slti", "sra", "srai", "srl", "srli", "sub", "xor", "xori")
|
||||
val rv32ui = new AssemblyTestSuite("rv32ui", rv32uiNames)(_)
|
||||
|
||||
val rv32ucNames = LinkedHashSet("rvc")
|
||||
val rv32uc = new AssemblyTestSuite("rv32uc", rv32ucNames)(_)
|
||||
|
||||
val rv32umNames = LinkedHashSet("mul", "mulh", "mulhsu", "mulhu", "div", "divu", "rem", "remu")
|
||||
val rv32um = new AssemblyTestSuite("rv32um", rv32umNames)(_)
|
||||
|
||||
val rv32uaNames = LinkedHashSet("lrsc", "amoadd_w", "amoand_w", "amoor_w", "amoxor_w", "amoswap_w", "amomax_w", "amomaxu_w", "amomin_w", "amominu_w")
|
||||
val rv32ua = new AssemblyTestSuite("rv32ua", rv32uaNames)(_)
|
||||
|
||||
val rv32siNames = LinkedHashSet("csr", "ma_fetch", "scall", "sbreak", "wfi", "dirty")
|
||||
val rv32si = new AssemblyTestSuite("rv32si", rv32siNames)(_)
|
||||
|
||||
val rv32miNames = LinkedHashSet("csr", "mcsr", "illegal", "ma_addr", "ma_fetch", "sbreak", "scall")
|
||||
val rv32mi = new AssemblyTestSuite("rv32mi", rv32miNames)(_)
|
||||
|
||||
val rv32u = List(rv32ui, rv32um)
|
||||
val rv32i = List(rv32ui, rv32si, rv32mi)
|
||||
val rv32pi = List(rv32ui, rv32mi)
|
||||
|
||||
val rv64uiNames = LinkedHashSet("addw", "addiw", "ld", "lwu", "sd", "slliw", "sllw", "sltiu", "sltu", "sraiw", "sraw", "srliw", "srlw", "subw")
|
||||
val rv64ui = new AssemblyTestSuite("rv64ui", rv32uiNames ++ rv64uiNames)(_)
|
||||
|
||||
val rv64umNames = LinkedHashSet("divuw", "divw", "mulw", "remuw", "remw")
|
||||
val rv64um = new AssemblyTestSuite("rv64um", rv32umNames ++ rv64umNames)(_)
|
||||
|
||||
val rv64uaNames = rv32uaNames.map(_.replaceAll("_w","_d"))
|
||||
val rv64ua = new AssemblyTestSuite("rv64ua", rv32uaNames ++ rv64uaNames)(_)
|
||||
|
||||
val rv64ucNames = rv32ucNames
|
||||
val rv64uc = new AssemblyTestSuite("rv64uc", rv64ucNames)(_)
|
||||
|
||||
val rv64ufNames = LinkedHashSet("ldst", "move", "fsgnj", "fcmp", "fcvt", "fcvt_w", "fclass", "fadd", "fdiv", "fmin", "fmadd")
|
||||
val rv64uf = new AssemblyTestSuite("rv64uf", rv64ufNames)(_)
|
||||
val rv64ufNoDiv = new AssemblyTestSuite("rv64uf", rv64ufNames - "fdiv")(_)
|
||||
|
||||
val rv64udNames = rv64ufNames + "structural"
|
||||
val rv64ud = new AssemblyTestSuite("rv64ud", rv64udNames)(_)
|
||||
val rv64udNoDiv = new AssemblyTestSuite("rv64ud", rv64udNames - "fdiv")(_)
|
||||
|
||||
val rv64siNames = rv32siNames
|
||||
val rv64si = new AssemblyTestSuite("rv64si", rv64siNames)(_)
|
||||
|
||||
val rv64miNames = rv32miNames + "breakpoint"
|
||||
val rv64mi = new AssemblyTestSuite("rv64mi", rv64miNames)(_)
|
||||
|
||||
val groundtestNames = LinkedHashSet("simple")
|
||||
val groundtest64 = new AssemblyTestSuite("rv64ui", groundtestNames)(_)
|
||||
val groundtest32 = new AssemblyTestSuite("rv32ui", groundtestNames)(_)
|
||||
|
||||
// TODO: "rv64ui-pm-lrsc", "rv64mi-pm-ipi",
|
||||
|
||||
val rv64u = List(rv64ui, rv64um)
|
||||
val rv64i = List(rv64ui, rv64si, rv64mi)
|
||||
val rv64pi = List(rv64ui, rv64mi)
|
||||
|
||||
val benchmarks = new BenchmarkTestSuite("rvi", "$(RISCV)/riscv64-unknown-elf/share/riscv-tests/benchmarks", LinkedHashSet(
|
||||
"median", "multiply", "qsort", "towers", "vvadd", "dhrystone", "mt-matmul"))
|
||||
|
||||
val rv32udBenchmarks = new BenchmarkTestSuite("rvd", "$(RISCV)/riscv64-unknown-elf/share/riscv-tests/benchmarks", LinkedHashSet(
|
||||
"mm", "spmv", "mt-vvadd"))
|
||||
|
||||
val emptyBmarks = new BenchmarkTestSuite("empty",
|
||||
"$(RISCV)/riscv64-unknown-elf/share/riscv-tests/benchmarks", LinkedHashSet.empty)
|
||||
|
||||
val mtBmarks = new BenchmarkTestSuite("mt", "$(RISCV)/riscv64-unknown-elf/share/riscv-tests/mt",
|
||||
LinkedHashSet(((0 to 4).map("vvadd"+_) ++
|
||||
List("ad","ae","af","ag","ai","ak","al","am","an","ap","aq","ar","at","av","ay","az",
|
||||
"bb","bc","bf","bh","bj","bk","bm","bo","br","bs","ce","cf","cg","ci","ck","cl",
|
||||
"cm","cs","cv","cy","dc","df","dm","do","dr","ds","du","dv").map(_+"_matmul")): _*))
|
||||
}
|
||||
22
src/main/scala/coreplex/UnitTest.scala
Normal file
22
src/main/scala/coreplex/UnitTest.scala
Normal file
@@ -0,0 +1,22 @@
|
||||
package coreplex
|
||||
|
||||
import Chisel._
|
||||
import junctions.unittests.UnitTestSuite
|
||||
import rocket.Tile
|
||||
import uncore.tilelink.TLId
|
||||
import cde.Parameters
|
||||
|
||||
class UnitTestCoreplex(topParams: Parameters) extends Coreplex()(topParams) {
|
||||
require(!exportMMIO)
|
||||
require(nExtClients == 0)
|
||||
require(nMemChannels == 0)
|
||||
|
||||
io.debug.req.ready := Bool(false)
|
||||
io.debug.resp.valid := Bool(false)
|
||||
|
||||
val l1params = p.alterPartial({ case TLId => "L1toL2" })
|
||||
val tests = Module(new UnitTestSuite()(l1params))
|
||||
|
||||
override def hasSuccessFlag = true
|
||||
io.success.get := tests.io.finished
|
||||
}
|
||||
115
src/main/scala/groundtest/BusMasterTest.scala
Normal file
115
src/main/scala/groundtest/BusMasterTest.scala
Normal file
@@ -0,0 +1,115 @@
|
||||
package groundtest
|
||||
|
||||
import Chisel._
|
||||
import uncore.tilelink._
|
||||
import uncore.agents._
|
||||
import uncore.coherence.{InnerTLId, OuterTLId}
|
||||
import uncore.util._
|
||||
import junctions.HasAddrMapParameters
|
||||
import cde.Parameters
|
||||
|
||||
/**
|
||||
* An example bus mastering devices that writes some preset data to memory.
|
||||
* When it receives an MMIO put request, it starts writing out the data.
|
||||
* When it receives an MMIO get request, it responds with the progress of
|
||||
* the write. A grant data of 1 means it is still writing, grant data 0
|
||||
* means it has finished.
|
||||
*/
|
||||
class ExampleBusMaster(implicit val p: Parameters) extends Module
|
||||
with HasAddrMapParameters
|
||||
with HasTileLinkParameters {
|
||||
val mmioParams = p.alterPartial({ case TLId => p(InnerTLId) })
|
||||
val memParams = p.alterPartial({ case TLId => p(OuterTLId) })
|
||||
val memStart = addrMap("mem").start
|
||||
val memStartBlock = memStart >> p(CacheBlockOffsetBits)
|
||||
|
||||
val io = new Bundle {
|
||||
val mmio = new ClientUncachedTileLinkIO()(mmioParams).flip
|
||||
val mem = new ClientUncachedTileLinkIO()(memParams)
|
||||
}
|
||||
|
||||
val s_idle :: s_put :: s_resp :: Nil = Enum(Bits(), 3)
|
||||
val state = Reg(init = s_idle)
|
||||
val send_resp = Reg(init = Bool(false))
|
||||
val r_acq = Reg(new AcquireMetadata)
|
||||
|
||||
io.mmio.acquire.ready := !send_resp
|
||||
io.mmio.grant.valid := send_resp
|
||||
io.mmio.grant.bits := Grant(
|
||||
is_builtin_type = Bool(true),
|
||||
g_type = r_acq.getBuiltInGrantType(),
|
||||
client_xact_id = r_acq.client_xact_id,
|
||||
manager_xact_id = UInt(0),
|
||||
addr_beat = r_acq.addr_beat,
|
||||
data = Mux(state === s_idle, UInt(0), UInt(1)))
|
||||
|
||||
when (io.mmio.acquire.fire()) {
|
||||
send_resp := Bool(true)
|
||||
r_acq := io.mmio.acquire.bits
|
||||
when (state === s_idle && io.mmio.acquire.bits.hasData()) { state := s_put }
|
||||
}
|
||||
when (io.mmio.grant.fire()) { send_resp := Bool(false) }
|
||||
|
||||
val (put_beat, put_done) = Counter(io.mem.acquire.fire(), tlDataBeats)
|
||||
when (put_done) { state := s_resp }
|
||||
when (io.mem.grant.fire()) { state := s_idle }
|
||||
|
||||
io.mem.acquire.valid := state === s_put
|
||||
io.mem.acquire.bits := PutBlock(
|
||||
client_xact_id = UInt(0),
|
||||
addr_block = UInt(memStartBlock),
|
||||
addr_beat = put_beat,
|
||||
data = put_beat)
|
||||
io.mem.grant.ready := state === s_resp
|
||||
}
|
||||
|
||||
class BusMasterTest(implicit p: Parameters) extends GroundTest()(p)
|
||||
with HasTileLinkParameters {
|
||||
val (s_idle :: s_req_start :: s_resp_start :: s_req_poll :: s_resp_poll ::
|
||||
s_req_check :: s_resp_check :: s_done :: Nil) = Enum(Bits(), 8)
|
||||
val state = Reg(init = s_idle)
|
||||
|
||||
val busMasterBlock = addrMap("io:ext:busmaster").start >> p(CacheBlockOffsetBits)
|
||||
val start_acq = Put(
|
||||
client_xact_id = UInt(0),
|
||||
addr_block = UInt(busMasterBlock),
|
||||
addr_beat = UInt(0),
|
||||
data = UInt(1))
|
||||
val poll_acq = Get(
|
||||
client_xact_id = UInt(0),
|
||||
addr_block = UInt(busMasterBlock),
|
||||
addr_beat = UInt(0))
|
||||
val check_acq = GetBlock(
|
||||
client_xact_id = UInt(0),
|
||||
addr_block = UInt(memStartBlock))
|
||||
|
||||
val acq = io.mem.head.acquire
|
||||
val gnt = io.mem.head.grant
|
||||
|
||||
acq.valid := state.isOneOf(s_req_start, s_req_poll, s_req_check)
|
||||
acq.bits := MuxLookup(state, check_acq, Seq(
|
||||
s_req_start -> start_acq,
|
||||
s_req_poll -> poll_acq))
|
||||
gnt.ready := state.isOneOf(s_resp_start, s_resp_poll, s_resp_check)
|
||||
|
||||
val (get_beat, get_done) = Counter(
|
||||
state === s_resp_check && gnt.valid, tlDataBeats)
|
||||
|
||||
when (state === s_idle) { state := s_req_start }
|
||||
when (state === s_req_start && acq.ready) { state := s_resp_start }
|
||||
when (state === s_resp_start && gnt.valid) { state := s_req_poll }
|
||||
when (state === s_req_poll && acq.ready) { state := s_resp_poll }
|
||||
when (state === s_resp_poll && gnt.valid) {
|
||||
when (gnt.bits.data === UInt(0)) {
|
||||
state := s_req_check
|
||||
} .otherwise { state := s_req_poll }
|
||||
}
|
||||
when (state === s_req_check && acq.ready) { state := s_resp_check }
|
||||
when (get_done) { state := s_done }
|
||||
|
||||
io.status.finished := state === s_done
|
||||
|
||||
assert(state =/= s_resp_check || !gnt.valid ||
|
||||
gnt.bits.data === get_beat,
|
||||
"BusMasterTest: data does not match")
|
||||
}
|
||||
50
src/main/scala/groundtest/CacheFillTest.scala
Normal file
50
src/main/scala/groundtest/CacheFillTest.scala
Normal file
@@ -0,0 +1,50 @@
|
||||
package groundtest
|
||||
|
||||
import Chisel._
|
||||
import uncore.tilelink._
|
||||
import uncore.constants._
|
||||
import uncore.agents._
|
||||
import uncore.util._
|
||||
import cde.{Parameters, Field}
|
||||
|
||||
class CacheFillTest(implicit p: Parameters) extends GroundTest()(p)
|
||||
with HasTileLinkParameters {
|
||||
val capacityKb: Int = p("L2_CAPACITY_IN_KB")
|
||||
val nblocks = capacityKb * 1024 / p(CacheBlockBytes)
|
||||
val s_start :: s_prefetch :: s_retrieve :: s_finished :: Nil = Enum(Bits(), 4)
|
||||
val state = Reg(init = s_start)
|
||||
|
||||
val active = state.isOneOf(s_prefetch, s_retrieve)
|
||||
|
||||
val xact_pending = Reg(init = UInt(0, tlMaxClientXacts))
|
||||
val xact_id = PriorityEncoder(~xact_pending)
|
||||
|
||||
val (req_block, round_done) = Counter(io.mem.head.acquire.fire(), nblocks)
|
||||
|
||||
io.mem.head.acquire.valid := active && !xact_pending.andR
|
||||
io.mem.head.acquire.bits := Mux(state === s_prefetch,
|
||||
GetPrefetch(xact_id, UInt(memStartBlock) + req_block),
|
||||
GetBlock(xact_id, UInt(memStartBlock) + req_block))
|
||||
io.mem.head.grant.ready := xact_pending.orR
|
||||
|
||||
def add_pending(acq: DecoupledIO[Acquire]): UInt =
|
||||
Mux(acq.fire(), UIntToOH(acq.bits.client_xact_id), UInt(0))
|
||||
|
||||
def remove_pending(gnt: DecoupledIO[Grant]): UInt = {
|
||||
val last_grant = !gnt.bits.hasMultibeatData() ||
|
||||
gnt.bits.addr_beat === UInt(tlDataBeats - 1)
|
||||
~Mux(gnt.fire() && last_grant, UIntToOH(gnt.bits.client_xact_id), UInt(0))
|
||||
}
|
||||
|
||||
xact_pending := (xact_pending |
|
||||
add_pending(io.mem.head.acquire)) &
|
||||
remove_pending(io.mem.head.grant)
|
||||
|
||||
when (state === s_start) { state := s_prefetch }
|
||||
when (state === s_prefetch && round_done) { state := s_retrieve }
|
||||
when (state === s_retrieve && round_done) { state := s_finished }
|
||||
|
||||
io.status.finished := (state === s_finished)
|
||||
io.status.timeout.valid := Bool(false)
|
||||
io.status.error.valid := Bool(false)
|
||||
}
|
||||
387
src/main/scala/groundtest/Comparator.scala
Normal file
387
src/main/scala/groundtest/Comparator.scala
Normal file
@@ -0,0 +1,387 @@
|
||||
package groundtest
|
||||
|
||||
import Chisel._
|
||||
import uncore.tilelink._
|
||||
import uncore.constants._
|
||||
import junctions._
|
||||
import rocket._
|
||||
import scala.util.Random
|
||||
import cde.{Parameters, Field}
|
||||
|
||||
case class ComparatorParameters(
|
||||
targets: Seq[Long],
|
||||
width: Int,
|
||||
operations: Int,
|
||||
atomics: Boolean,
|
||||
prefetches: Boolean)
|
||||
case object ComparatorKey extends Field[ComparatorParameters]
|
||||
|
||||
trait HasComparatorParameters {
|
||||
implicit val p: Parameters
|
||||
val comparatorParams = p(ComparatorKey)
|
||||
val targets = comparatorParams.targets
|
||||
val nTargets = targets.size
|
||||
val targetWidth = comparatorParams.width
|
||||
val nOperations = comparatorParams.operations
|
||||
val atomics = comparatorParams.atomics
|
||||
val prefetches = comparatorParams.prefetches
|
||||
}
|
||||
|
||||
object LFSR64
|
||||
{
|
||||
private var counter = 0
|
||||
private def next: Int = {
|
||||
counter += 1
|
||||
counter
|
||||
}
|
||||
|
||||
def apply(increment: Bool = Bool(true), seed: Int = next): UInt =
|
||||
{
|
||||
val wide = 64
|
||||
val lfsr = RegInit(UInt((seed * 0xDEADBEEFCAFEBAB1L) >>> 1, width = wide))
|
||||
val xor = lfsr(0) ^ lfsr(1) ^ lfsr(3) ^ lfsr(4)
|
||||
when (increment) { lfsr := Cat(xor, lfsr(wide-1,1)) }
|
||||
lfsr
|
||||
}
|
||||
}
|
||||
|
||||
object NoiseMaker
|
||||
{
|
||||
def apply(wide: Int, increment: Bool = Bool(true)): UInt = {
|
||||
val lfsrs = Seq.fill((wide+63)/64) { LFSR64(increment) }
|
||||
Cat(lfsrs)(wide-1,0)
|
||||
}
|
||||
}
|
||||
|
||||
object MaskMaker
|
||||
{
|
||||
def apply(wide: Int, bits: UInt): UInt =
|
||||
Vec.tabulate(wide) {UInt(_) < bits} .asUInt
|
||||
}
|
||||
|
||||
class ComparatorSource(implicit val p: Parameters) extends Module
|
||||
with HasComparatorParameters
|
||||
with HasTileLinkParameters
|
||||
{
|
||||
val io = new Bundle {
|
||||
val out = Decoupled(new Acquire)
|
||||
val finished = Bool(OUTPUT)
|
||||
}
|
||||
|
||||
// Output exactly nOperations of Acquires
|
||||
val finished = RegInit(Bool(false))
|
||||
val valid = RegInit(Bool(false))
|
||||
|
||||
valid := Bool(true)
|
||||
|
||||
io.finished := finished
|
||||
io.out.valid := !finished && valid
|
||||
|
||||
// Generate random operand sizes
|
||||
val inc = io.out.fire()
|
||||
val raw_operand_size = NoiseMaker(2, inc) | UInt(0, M_SZ)
|
||||
val max_operand_size = UInt(log2Up(tlDataBytes))
|
||||
val get_operand_size = Mux(raw_operand_size > max_operand_size, max_operand_size, raw_operand_size)
|
||||
val atomic_operand_size = UInt(2) + NoiseMaker(1, inc) // word or dword
|
||||
|
||||
// Generate random, but valid addr_bytes
|
||||
val raw_addr_byte = NoiseMaker(tlByteAddrBits, inc)
|
||||
val get_addr_byte = raw_addr_byte & ~MaskMaker(tlByteAddrBits, get_operand_size)
|
||||
val atomic_addr_byte = raw_addr_byte & ~MaskMaker(tlByteAddrBits, atomic_operand_size)
|
||||
|
||||
// Only allow some of the possible choices (M_XA_MAXU untested)
|
||||
val atomic_opcode = MuxLookup(NoiseMaker(3, inc), M_XA_SWAP, Array(
|
||||
UInt("b000") -> M_XA_ADD,
|
||||
UInt("b001") -> M_XA_XOR,
|
||||
UInt("b010") -> M_XA_OR,
|
||||
UInt("b011") -> M_XA_AND,
|
||||
UInt("b100") -> M_XA_MIN,
|
||||
UInt("b101") -> M_XA_MAX,
|
||||
UInt("b110") -> M_XA_MINU,
|
||||
UInt("b111") -> M_XA_SWAP))
|
||||
|
||||
// Addr_block range
|
||||
val addr_block_mask = MaskMaker(tlBlockAddrBits, UInt(targetWidth-tlBeatAddrBits-tlByteAddrBits))
|
||||
|
||||
// Generate some random values
|
||||
val addr_block = NoiseMaker(tlBlockAddrBits, inc) & addr_block_mask
|
||||
val addr_beat = NoiseMaker(tlBeatAddrBits, inc)
|
||||
val wmask = NoiseMaker(tlDataBytes, inc)
|
||||
val data = NoiseMaker(tlDataBits, inc)
|
||||
val client_xact_id = UInt(0) // filled by Client
|
||||
|
||||
// Random transactions
|
||||
val get = Get(client_xact_id, addr_block, addr_beat, get_addr_byte, get_operand_size, Bool(false))
|
||||
val getBlock = GetBlock(client_xact_id, addr_block)
|
||||
val put = Put(client_xact_id, addr_block, addr_beat, data, Some(wmask))
|
||||
val putBlock = PutBlock(client_xact_id, addr_block, UInt(0), data)
|
||||
val putAtomic = if (atomics)
|
||||
PutAtomic(client_xact_id, addr_block, addr_beat,
|
||||
atomic_addr_byte, atomic_opcode, atomic_operand_size, data)
|
||||
else put
|
||||
val putPrefetch = if (prefetches)
|
||||
PutPrefetch(client_xact_id, addr_block)
|
||||
else put
|
||||
val getPrefetch = if (prefetches)
|
||||
GetPrefetch(client_xact_id, addr_block)
|
||||
else get
|
||||
val a_type_sel = NoiseMaker(3, inc)
|
||||
|
||||
// We must initially putBlock all of memory to have a consistent starting state
|
||||
val final_addr_block = addr_block_mask + UInt(1)
|
||||
val wipe_addr_block = RegInit(UInt(0, width = tlBlockAddrBits))
|
||||
val done_wipe = wipe_addr_block === final_addr_block
|
||||
|
||||
io.out.bits := Mux(!done_wipe,
|
||||
// Override whatever else we were going to do if we are wiping
|
||||
PutBlock(client_xact_id, wipe_addr_block, UInt(0), data),
|
||||
// Generate a random a_type
|
||||
MuxLookup(a_type_sel, get, Array(
|
||||
UInt("b000") -> get,
|
||||
UInt("b001") -> getBlock,
|
||||
UInt("b010") -> put,
|
||||
UInt("b011") -> putBlock,
|
||||
UInt("b100") -> putAtomic,
|
||||
UInt("b101") -> getPrefetch,
|
||||
UInt("b110") -> putPrefetch)))
|
||||
|
||||
val idx = Reg(init = UInt(0, log2Up(nOperations)))
|
||||
when (io.out.fire()) {
|
||||
when (idx === UInt(nOperations - 1)) { finished := Bool(true) }
|
||||
when (!done_wipe) {
|
||||
printf("[acq %d]: PutBlock(addr_block = %x, data = %x)\n",
|
||||
idx, wipe_addr_block, data)
|
||||
wipe_addr_block := wipe_addr_block + UInt(1)
|
||||
} .otherwise {
|
||||
switch (a_type_sel) {
|
||||
is (UInt("b000")) {
|
||||
printf("[acq %d]: Get(addr_block = %x, addr_beat = %x, addr_byte = %x, op_size = %x)\n",
|
||||
idx, addr_block, addr_beat, get_addr_byte, get_operand_size)
|
||||
}
|
||||
is (UInt("b001")) {
|
||||
printf("[acq %d]: GetBlock(addr_block = %x)\n", idx, addr_block)
|
||||
}
|
||||
is (UInt("b010")) {
|
||||
printf("[acq %d]: Put(addr_block = %x, addr_beat = %x, data = %x, wmask = %x)\n",
|
||||
idx, addr_block, addr_beat, data, wmask)
|
||||
}
|
||||
is (UInt("b011")) {
|
||||
printf("[acq %d]: PutBlock(addr_block = %x, data = %x)\n", idx, addr_block, data)
|
||||
}
|
||||
is (UInt("b100")) {
|
||||
if (atomics) {
|
||||
printf("[acq %d]: PutAtomic(addr_block = %x, addr_beat = %x, addr_byte = %x, " +
|
||||
"opcode = %x, op_size = %x, data = %x)\n",
|
||||
idx, addr_block, addr_beat, atomic_addr_byte,
|
||||
atomic_opcode, atomic_operand_size, data)
|
||||
} else {
|
||||
printf("[acq %d]: Put(addr_block = %x, addr_beat = %x, data = %x, wmask = %x)\n",
|
||||
idx, addr_block, addr_beat, data, wmask)
|
||||
}
|
||||
}
|
||||
is (UInt("b101")) {
|
||||
if (prefetches) {
|
||||
printf("[acq %d]: GetPrefetch(addr_block = %x)\n", idx, addr_block)
|
||||
} else {
|
||||
printf("[acq %d]: Get(addr_block = %x, addr_beat = %x, addr_byte = %x, op_size = %x)\n",
|
||||
idx, addr_block, addr_beat, get_addr_byte, get_operand_size)
|
||||
}
|
||||
}
|
||||
is (UInt("b110")) {
|
||||
if (prefetches) {
|
||||
printf("[acq %d]: PutPrefetch(addr_block = %x)\n", idx, addr_block)
|
||||
} else {
|
||||
printf("[acq %d]: Put(addr_block = %x, addr_beat = %x, data = %x, wmask = %x)\n",
|
||||
idx, addr_block, addr_beat, data, wmask)
|
||||
}
|
||||
}
|
||||
is (UInt("b111")) {
|
||||
printf("[acq %d]: Get(addr_block = %x, addr_beat = %x, addr_byte = %x, op_size = %x)\n",
|
||||
idx, addr_block, addr_beat, get_addr_byte, get_operand_size)
|
||||
}
|
||||
}
|
||||
}
|
||||
idx := idx + UInt(1)
|
||||
}
|
||||
}
|
||||
|
||||
class ComparatorClient(val target: Long)(implicit val p: Parameters) extends Module
|
||||
with HasComparatorParameters
|
||||
with HasTileLinkParameters
|
||||
{
|
||||
val io = new Bundle {
|
||||
val in = Decoupled(new Acquire).flip
|
||||
val tl = new ClientUncachedTileLinkIO()
|
||||
val out = Decoupled(new Grant)
|
||||
val finished = Bool(OUTPUT)
|
||||
val timeout = Bool(OUTPUT)
|
||||
}
|
||||
|
||||
val xacts = tlMaxClientXacts
|
||||
val offset = (UInt(target) >> UInt(tlBeatAddrBits+tlByteAddrBits))
|
||||
|
||||
// Track the status of inflight requests
|
||||
val issued = RegInit(Vec.fill(xacts) {Bool(false)})
|
||||
val ready = RegInit(Vec.fill(xacts) {Bool(false)})
|
||||
val result = Reg(Vec(xacts, new Grant))
|
||||
|
||||
val buffer = Queue(io.in, xacts)
|
||||
val queue = Module(new Queue(io.tl.acquire.bits.client_xact_id, xacts))
|
||||
|
||||
val isMultiOut = buffer.bits.hasMultibeatData()
|
||||
val isMultiIn = io.tl.grant.bits.hasMultibeatData()
|
||||
|
||||
val beatOut = RegInit(UInt(0, width = tlBeatAddrBits))
|
||||
val lastBeat = UInt(tlDataBeats-1)
|
||||
val isFirstBeatOut= Mux(isMultiOut, beatOut === UInt(0), Bool(true))
|
||||
val isLastBeatOut = Mux(isMultiOut, beatOut === lastBeat, Bool(true))
|
||||
val isLastBeatIn = Mux(isMultiIn, io.tl.grant.bits.addr_beat === lastBeat, Bool(true))
|
||||
|
||||
// Remove this once HoldUnless is in chisel3
|
||||
def holdUnless[T <: Data](in : T, enable: Bool): T = Mux(!enable, RegEnable(in, enable), in)
|
||||
|
||||
// Potentially issue a request, using a free xact id
|
||||
// NOTE: we may retract valid and change xact_id on a !ready (allowed by spec)
|
||||
val allow_acq = NoiseMaker(1)(0) && issued.map(!_).reduce(_ || _)
|
||||
val xact_id = holdUnless(PriorityEncoder(issued.map(!_)), isFirstBeatOut)
|
||||
buffer.ready := allow_acq && io.tl.acquire.ready && isLastBeatOut
|
||||
io.tl.acquire.valid := allow_acq && buffer.valid
|
||||
io.tl.acquire.bits := buffer.bits
|
||||
io.tl.acquire.bits.addr_block := buffer.bits.addr_block + offset
|
||||
io.tl.acquire.bits.client_xact_id := xact_id
|
||||
when (isMultiOut) {
|
||||
val dataOut = (buffer.bits.data << beatOut) + buffer.bits.data // mix the data up a bit
|
||||
io.tl.acquire.bits.addr_beat := beatOut
|
||||
io.tl.acquire.bits.data := dataOut
|
||||
}
|
||||
|
||||
when (io.tl.acquire.fire()) {
|
||||
issued(xact_id) := isLastBeatOut
|
||||
when (isMultiOut) { beatOut := beatOut + UInt(1) }
|
||||
}
|
||||
|
||||
// Remember the xact ID so we can return results in-order
|
||||
queue.io.enq.valid := io.tl.acquire.fire() && isLastBeatOut
|
||||
queue.io.enq.bits := xact_id
|
||||
assert (queue.io.enq.ready || !queue.io.enq.valid) // should be big enough
|
||||
|
||||
// Capture the results from the manager
|
||||
io.tl.grant.ready := NoiseMaker(1)(0)
|
||||
when (io.tl.grant.fire()) {
|
||||
val id = io.tl.grant.bits.client_xact_id
|
||||
assert (!ready(id)) // got same xact_id twice?
|
||||
ready(id) := isLastBeatIn
|
||||
result(id) := io.tl.grant.bits
|
||||
}
|
||||
|
||||
// Bad xact_id returned if ready but not issued!
|
||||
assert ((ready zip issued) map {case (r,i) => i || !r} reduce (_ && _))
|
||||
|
||||
// When we have the next grant result, send it to the sink
|
||||
val next_id = queue.io.deq.bits
|
||||
queue.io.deq.ready := io.out.ready && ready(next_id) // TODO: only compares last getBlock
|
||||
io.out.valid := queue.io.deq.valid && ready(next_id)
|
||||
io.out.bits := result(queue.io.deq.bits)
|
||||
|
||||
when (io.out.fire()) {
|
||||
ready(next_id) := Bool(false)
|
||||
issued(next_id) := Bool(false)
|
||||
}
|
||||
|
||||
io.finished := !buffer.valid && !issued.reduce(_ || _)
|
||||
|
||||
val (idx, acq_done) = Counter(
|
||||
io.tl.acquire.fire() && io.tl.acquire.bits.last(), nOperations)
|
||||
debug(idx)
|
||||
|
||||
val timer = Module(new Timer(8192, xacts))
|
||||
timer.io.start.valid := io.tl.acquire.fire() && io.tl.acquire.bits.first()
|
||||
timer.io.start.bits := xact_id
|
||||
timer.io.stop.valid := io.tl.grant.fire() && io.tl.grant.bits.first()
|
||||
timer.io.stop.bits := io.tl.grant.bits.client_xact_id
|
||||
assert(!timer.io.timeout.valid, "Comparator TL client timed out")
|
||||
io.timeout := timer.io.timeout.valid
|
||||
}
|
||||
|
||||
class ComparatorSink(implicit val p: Parameters) extends Module
|
||||
with HasComparatorParameters
|
||||
with HasTileLinkParameters
|
||||
with HasGroundTestConstants
|
||||
{
|
||||
val io = new Bundle {
|
||||
val in = Vec(nTargets, Decoupled(new Grant)).flip
|
||||
val finished = Bool(OUTPUT)
|
||||
val error = Valid(UInt(width = errorCodeBits))
|
||||
}
|
||||
|
||||
// could use a smaller Queue here, but would couple targets flow controls together
|
||||
val queues = io.in.map(Queue(_, nOperations))
|
||||
|
||||
io.finished := queues.map(!_.valid).reduce(_ && _)
|
||||
val all_valid = queues.map(_.valid).reduce(_ && _)
|
||||
queues.foreach(_.ready := all_valid)
|
||||
|
||||
val base = queues(0).bits
|
||||
val idx = Reg(init = UInt(0, log2Up(nOperations)))
|
||||
|
||||
def check(g: Grant) = {
|
||||
when (g.hasData() && base.data =/= g.data) {
|
||||
printf("%d: %x =/= %x, g_type = %x\n", idx, base.data, g.data, g.g_type)
|
||||
}
|
||||
|
||||
val assert_conds = Seq(
|
||||
g.is_builtin_type,
|
||||
base.g_type === g.g_type,
|
||||
base.addr_beat === g.addr_beat || !g.hasData(),
|
||||
base.data === g.data || !g.hasData())
|
||||
|
||||
assert (g.is_builtin_type, "grant not builtin")
|
||||
assert (base.g_type === g.g_type, "g_type mismatch")
|
||||
assert (base.addr_beat === g.addr_beat || !g.hasData(), "addr_beat mismatch")
|
||||
assert (base.data === g.data || !g.hasData(), "data mismatch")
|
||||
|
||||
assert_conds.zipWithIndex.foreach { case (cond, i) =>
|
||||
when (!cond) {
|
||||
io.error.valid := Bool(true)
|
||||
io.error.bits := UInt(i)
|
||||
}
|
||||
}
|
||||
}
|
||||
when (all_valid) {
|
||||
when (base.hasData()) {
|
||||
printf("[gnt %d]: g_type = %x, addr_beat = %x, data = %x\n",
|
||||
idx, base.g_type, base.addr_beat, base.data)
|
||||
} .otherwise {
|
||||
printf("[gnt %d]: g_type = %x\n", idx, base.g_type)
|
||||
}
|
||||
queues.drop(1).map(_.bits).foreach(check)
|
||||
idx := idx + UInt(1)
|
||||
}
|
||||
}
|
||||
|
||||
class ComparatorCore(implicit p: Parameters) extends GroundTest()(p)
|
||||
with HasComparatorParameters
|
||||
with HasTileLinkParameters {
|
||||
|
||||
require (io.mem.size == nTargets)
|
||||
|
||||
val source = Module(new ComparatorSource)
|
||||
val sink = Module(new ComparatorSink)
|
||||
val broadcast = Broadcaster(source.io.out, nTargets)
|
||||
val clients = targets.zipWithIndex.map { case (target, index) =>
|
||||
val client = Module(new ComparatorClient(target))
|
||||
client.io.in <> broadcast(index)
|
||||
io.mem(index) <> client.io.tl
|
||||
sink.io.in(index) <> client.io.out
|
||||
client
|
||||
}
|
||||
val client_timeouts = clients.map(_.io.timeout)
|
||||
|
||||
io.status.finished := source.io.finished && sink.io.finished && clients.map(_.io.finished).reduce(_ && _)
|
||||
io.status.timeout.valid := client_timeouts.reduce(_ || _)
|
||||
io.status.timeout.bits := MuxCase(UInt(0),
|
||||
client_timeouts.zipWithIndex.map {
|
||||
case (timeout, i) => (timeout -> UInt(i))
|
||||
})
|
||||
io.status.error := sink.io.error
|
||||
}
|
||||
212
src/main/scala/groundtest/Generator.scala
Normal file
212
src/main/scala/groundtest/Generator.scala
Normal file
@@ -0,0 +1,212 @@
|
||||
package groundtest
|
||||
|
||||
import Chisel._
|
||||
import uncore.tilelink._
|
||||
import uncore.devices.NTiles
|
||||
import uncore.constants._
|
||||
import junctions._
|
||||
import rocket._
|
||||
import scala.util.Random
|
||||
import cde.{Parameters, Field}
|
||||
|
||||
case class GeneratorParameters(
|
||||
maxRequests: Int,
|
||||
startAddress: BigInt)
|
||||
case object GeneratorKey extends Field[GeneratorParameters]
|
||||
|
||||
trait HasGeneratorParameters extends HasGroundTestParameters {
|
||||
implicit val p: Parameters
|
||||
|
||||
val genParams = p(GeneratorKey)
|
||||
val nGens = p(GroundTestKey).map(
|
||||
cs => cs.uncached + cs.cached).reduce(_ + _)
|
||||
val genTimeout = 8192
|
||||
val maxRequests = genParams.maxRequests
|
||||
val startAddress = genParams.startAddress
|
||||
|
||||
val genWordBits = 32
|
||||
val genWordBytes = genWordBits / 8
|
||||
val wordOffset = log2Ceil(genWordBytes)
|
||||
val wordSize = UInt(log2Ceil(genWordBytes))
|
||||
|
||||
require(startAddress % BigInt(genWordBytes) == 0)
|
||||
}
|
||||
|
||||
class UncachedTileLinkGenerator(id: Int)
|
||||
(implicit p: Parameters) extends TLModule()(p) with HasGeneratorParameters {
|
||||
|
||||
private val tlBlockOffset = tlBeatAddrBits + tlByteAddrBits
|
||||
|
||||
val io = new Bundle {
|
||||
val mem = new ClientUncachedTileLinkIO
|
||||
val status = new GroundTestStatus
|
||||
}
|
||||
|
||||
val (s_start :: s_put :: s_get :: s_finished :: Nil) = Enum(Bits(), 4)
|
||||
val state = Reg(init = s_start)
|
||||
|
||||
val (req_cnt, req_wrap) = Counter(io.mem.grant.fire(), maxRequests)
|
||||
|
||||
val sending = Reg(init = Bool(false))
|
||||
|
||||
when (state === s_start) {
|
||||
sending := Bool(true)
|
||||
state := s_put
|
||||
}
|
||||
|
||||
when (io.mem.acquire.fire()) { sending := Bool(false) }
|
||||
when (io.mem.grant.fire()) { sending := Bool(true) }
|
||||
when (req_wrap) { state := Mux(state === s_put, s_get, s_finished) }
|
||||
|
||||
val timeout = Timer(genTimeout, io.mem.acquire.fire(), io.mem.grant.fire())
|
||||
assert(!timeout, s"Uncached generator ${id} timed out waiting for grant")
|
||||
|
||||
io.status.finished := (state === s_finished)
|
||||
io.status.timeout.valid := timeout
|
||||
io.status.timeout.bits := UInt(id)
|
||||
|
||||
val part_of_full_addr =
|
||||
if (log2Ceil(nGens) > 0) {
|
||||
Cat(UInt(id, log2Ceil(nGens)),
|
||||
UInt(0, wordOffset))
|
||||
} else {
|
||||
UInt(0, wordOffset)
|
||||
}
|
||||
val full_addr = UInt(startAddress) + Cat(req_cnt, part_of_full_addr)
|
||||
|
||||
val addr_block = full_addr >> UInt(tlBlockOffset)
|
||||
val addr_beat = full_addr(tlBlockOffset - 1, tlByteAddrBits)
|
||||
val addr_byte = full_addr(tlByteAddrBits - 1, 0)
|
||||
|
||||
val data_prefix = Cat(UInt(id, log2Up(nGens)), req_cnt)
|
||||
val word_data = Wire(UInt(width = genWordBits))
|
||||
word_data := Cat(data_prefix, part_of_full_addr)
|
||||
val beat_data = Fill(tlDataBits / genWordBits, word_data)
|
||||
val wshift = Cat(beatOffset(full_addr), UInt(0, wordOffset))
|
||||
val wmask = Fill(genWordBits / 8, Bits(1, 1)) << wshift
|
||||
|
||||
val put_acquire = Put(
|
||||
client_xact_id = UInt(0),
|
||||
addr_block = addr_block,
|
||||
addr_beat = addr_beat,
|
||||
data = beat_data,
|
||||
wmask = Some(wmask),
|
||||
alloc = Bool(false))
|
||||
|
||||
val get_acquire = Get(
|
||||
client_xact_id = UInt(0),
|
||||
addr_block = addr_block,
|
||||
addr_beat = addr_beat,
|
||||
addr_byte = addr_byte,
|
||||
operand_size = wordSize,
|
||||
alloc = Bool(false))
|
||||
|
||||
io.mem.acquire.valid := sending && !io.status.finished
|
||||
io.mem.acquire.bits := Mux(state === s_put, put_acquire, get_acquire)
|
||||
io.mem.grant.ready := !sending && !io.status.finished
|
||||
|
||||
def wordFromBeat(addr: UInt, dat: UInt) = {
|
||||
val shift = Cat(beatOffset(addr), UInt(0, wordOffset + 3))
|
||||
(dat >> shift)(genWordBits - 1, 0)
|
||||
}
|
||||
|
||||
val data_mismatch = io.mem.grant.fire() && state === s_get &&
|
||||
wordFromBeat(full_addr, io.mem.grant.bits.data) =/= word_data
|
||||
|
||||
io.status.error.valid := data_mismatch
|
||||
io.status.error.bits := UInt(id)
|
||||
|
||||
assert(!data_mismatch,
|
||||
s"Get received incorrect data in uncached generator ${id}")
|
||||
|
||||
def beatOffset(addr: UInt) = // TODO zero-width
|
||||
if (tlByteAddrBits > wordOffset) addr(tlByteAddrBits - 1, wordOffset)
|
||||
else UInt(0)
|
||||
}
|
||||
|
||||
class HellaCacheGenerator(id: Int)
|
||||
(implicit p: Parameters) extends L1HellaCacheModule()(p) with HasGeneratorParameters {
|
||||
val io = new Bundle {
|
||||
val mem = new HellaCacheIO
|
||||
val status = new GroundTestStatus
|
||||
}
|
||||
|
||||
val timeout = Timer(genTimeout, io.mem.req.fire(), io.mem.resp.valid)
|
||||
assert(!timeout, s"Cached generator ${id} timed out waiting for response")
|
||||
io.status.timeout.valid := timeout
|
||||
io.status.timeout.bits := UInt(id)
|
||||
|
||||
val (s_start :: s_write :: s_read :: s_finished :: Nil) = Enum(Bits(), 4)
|
||||
val state = Reg(init = s_start)
|
||||
val sending = Reg(init = Bool(false))
|
||||
|
||||
val (req_cnt, req_wrap) = Counter(io.mem.resp.valid, maxRequests)
|
||||
|
||||
val part_of_req_addr =
|
||||
if (log2Ceil(nGens) > 0) {
|
||||
Cat(UInt(id, log2Ceil(nGens)),
|
||||
UInt(0, wordOffset))
|
||||
} else {
|
||||
UInt(0, wordOffset)
|
||||
}
|
||||
val req_addr = UInt(startAddress) + Cat(req_cnt, part_of_req_addr)
|
||||
val req_data = Cat(UInt(id, log2Up(nGens)), req_cnt, part_of_req_addr)
|
||||
|
||||
io.mem.req.valid := sending && !io.status.finished
|
||||
io.mem.req.bits.addr := req_addr
|
||||
io.mem.req.bits.data := req_data
|
||||
io.mem.req.bits.typ := wordSize
|
||||
io.mem.req.bits.cmd := Mux(state === s_write, M_XWR, M_XRD)
|
||||
io.mem.req.bits.tag := UInt(0)
|
||||
|
||||
when (state === s_start) { sending := Bool(true); state := s_write }
|
||||
|
||||
when (io.mem.req.fire()) { sending := Bool(false) }
|
||||
when (io.mem.resp.valid) { sending := Bool(true) }
|
||||
|
||||
when (req_wrap) { state := Mux(state === s_write, s_read, s_finished) }
|
||||
|
||||
io.status.finished := (state === s_finished)
|
||||
|
||||
def data_match(recv: Bits, expected: Bits): Bool = {
|
||||
val recv_resized = Wire(Bits(width = genWordBits))
|
||||
val exp_resized = Wire(Bits(width = genWordBits))
|
||||
|
||||
recv_resized := recv
|
||||
exp_resized := expected
|
||||
recv_resized === exp_resized
|
||||
}
|
||||
|
||||
val data_mismatch = io.mem.resp.valid && io.mem.resp.bits.has_data &&
|
||||
!data_match(io.mem.resp.bits.data, req_data)
|
||||
|
||||
io.status.error.valid := data_mismatch
|
||||
io.status.error.bits := UInt(id)
|
||||
|
||||
assert(!data_mismatch,
|
||||
s"Received incorrect data in cached generator ${id}")
|
||||
}
|
||||
|
||||
class GeneratorTest(implicit p: Parameters)
|
||||
extends GroundTest()(p) with HasGeneratorParameters {
|
||||
|
||||
val idStart = p(GroundTestKey).take(tileId)
|
||||
.map(settings => settings.cached + settings.uncached)
|
||||
.foldLeft(0)(_ + _)
|
||||
|
||||
val cached = List.tabulate(nCached) { i =>
|
||||
val realId = idStart + i
|
||||
Module(new HellaCacheGenerator(realId))
|
||||
}
|
||||
|
||||
val uncached = List.tabulate(nUncached) { i =>
|
||||
val realId = idStart + nCached + i
|
||||
Module(new UncachedTileLinkGenerator(realId))
|
||||
}
|
||||
|
||||
io.cache <> cached.map(_.io.mem)
|
||||
io.mem <> uncached.map(_.io.mem)
|
||||
|
||||
val gen_debug = cached.map(_.io.status) ++ uncached.map(_.io.status)
|
||||
io.status := DebugCombiner(gen_debug)
|
||||
}
|
||||
121
src/main/scala/groundtest/NastiTest.scala
Normal file
121
src/main/scala/groundtest/NastiTest.scala
Normal file
@@ -0,0 +1,121 @@
|
||||
package groundtest
|
||||
|
||||
import Chisel._
|
||||
import uncore.tilelink._
|
||||
import uncore.converters._
|
||||
import junctions._
|
||||
import cde.Parameters
|
||||
|
||||
class NastiGenerator(id: Int)(implicit val p: Parameters) extends Module
|
||||
with HasNastiParameters
|
||||
with HasMIFParameters
|
||||
with HasAddrMapParameters
|
||||
with HasGeneratorParameters {
|
||||
|
||||
val io = new Bundle {
|
||||
val status = new GroundTestStatus
|
||||
val mem = new NastiIO
|
||||
}
|
||||
|
||||
val mifDataBytes = mifDataBits / 8
|
||||
|
||||
val (s_start :: s_write_addr :: s_write_data ::
|
||||
s_read :: s_wait :: s_finish :: Nil) = Enum(Bits(), 6)
|
||||
val state = Reg(init = s_start)
|
||||
|
||||
def ref_data(idx: UInt) = UInt(0x35abffcd, genWordBits) + (idx << UInt(3))
|
||||
|
||||
val part_of_addr =
|
||||
if (log2Ceil(nGens) > 0) {
|
||||
Cat(UInt(id, log2Ceil(nGens)),
|
||||
UInt(0, wordOffset))
|
||||
} else {
|
||||
UInt(0, wordOffset)
|
||||
}
|
||||
|
||||
val (write_idx, write_done) = Counter(io.mem.w.fire(), maxRequests)
|
||||
val write_addr = UInt(startAddress) + Cat(write_idx, part_of_addr)
|
||||
val write_data = Fill(mifDataBits / genWordBits, ref_data(write_idx))
|
||||
val write_align = write_addr(log2Up(mifDataBytes) - 1, 0)
|
||||
val write_mask = UInt((1 << genWordBytes) - 1, nastiWStrobeBits) << write_align
|
||||
|
||||
val (read_idx, read_done) = Counter(io.mem.ar.fire(), maxRequests)
|
||||
val read_addr = UInt(startAddress) + Cat(read_idx, part_of_addr)
|
||||
|
||||
io.mem.aw.valid := (state === s_write_addr)
|
||||
io.mem.aw.bits := NastiWriteAddressChannel(
|
||||
id = write_idx(nastiXIdBits - 1, 0),
|
||||
addr = write_addr,
|
||||
len = UInt(0),
|
||||
size = UInt(log2Ceil(genWordBytes)))
|
||||
|
||||
io.mem.w.valid := (state === s_write_data)
|
||||
io.mem.w.bits := NastiWriteDataChannel(
|
||||
data = write_data,
|
||||
strb = Some(write_mask),
|
||||
last = Bool(true))
|
||||
|
||||
io.mem.ar.valid := (state === s_read)
|
||||
io.mem.ar.bits := NastiReadAddressChannel(
|
||||
id = UInt(0),
|
||||
addr = read_addr,
|
||||
len = UInt(0),
|
||||
size = UInt(log2Ceil(genWordBytes)))
|
||||
|
||||
io.mem.r.ready := Bool(true)
|
||||
io.mem.b.ready := Bool(true)
|
||||
|
||||
io.status.finished := (state === s_finish)
|
||||
|
||||
val (read_resp_idx, read_resp_done) = Counter(io.mem.r.fire(), maxRequests)
|
||||
val read_resp_addr = UInt(startAddress) + Cat(read_resp_idx, part_of_addr)
|
||||
val read_offset = read_resp_addr(log2Up(nastiXDataBits / 8) - 1, 0)
|
||||
val read_shift = Cat(read_offset, UInt(0, 3))
|
||||
val read_data = (io.mem.r.bits.data >> read_shift)(genWordBits - 1, 0)
|
||||
|
||||
val data_mismatch = io.mem.r.valid && read_data =/= ref_data(read_resp_idx)
|
||||
assert(!data_mismatch, "NASTI Test: results do not match")
|
||||
io.status.error.valid := data_mismatch
|
||||
io.status.error.bits := UInt(1)
|
||||
|
||||
when (state === s_start) { state := s_write_addr }
|
||||
when (io.mem.aw.fire()) { state := s_write_data }
|
||||
when (io.mem.w.fire()) { state := s_write_addr }
|
||||
when (write_done) { state := s_read }
|
||||
when (read_done) { state := s_wait }
|
||||
when (read_resp_done) { state := s_finish }
|
||||
|
||||
val r_timer = Module(new Timer(1000, 2))
|
||||
r_timer.io.start.valid := io.mem.ar.fire()
|
||||
r_timer.io.start.bits := io.mem.ar.bits.id
|
||||
r_timer.io.stop.valid := io.mem.r.fire() && io.mem.r.bits.last
|
||||
r_timer.io.stop.bits := io.mem.r.bits.id
|
||||
assert(!r_timer.io.timeout.valid, "NASTI Read timed out")
|
||||
|
||||
val w_timer = Module(new Timer(1000, 2))
|
||||
w_timer.io.start.valid := io.mem.aw.fire()
|
||||
w_timer.io.start.bits := io.mem.aw.bits.id
|
||||
w_timer.io.stop.valid := io.mem.b.fire()
|
||||
w_timer.io.stop.bits := io.mem.b.bits.id
|
||||
assert(!w_timer.io.timeout.valid, "NASTI Write timed out")
|
||||
|
||||
io.status.timeout.valid := r_timer.io.timeout.valid || w_timer.io.timeout.valid
|
||||
io.status.timeout.bits := Mux(r_timer.io.timeout.valid, UInt(1), UInt(2))
|
||||
}
|
||||
|
||||
class NastiConverterTest(implicit p: Parameters) extends GroundTest()(p)
|
||||
with HasNastiParameters {
|
||||
require(tileSettings.uncached == 1 && tileSettings.cached == 0)
|
||||
|
||||
val genId = p(GroundTestKey).take(tileId)
|
||||
.map(settings => settings.cached + settings.uncached)
|
||||
.foldLeft(0)(_ + _)
|
||||
|
||||
val test = Module(new NastiGenerator(genId))
|
||||
val converter = Module(new TileLinkIONastiIOConverter()(
|
||||
p.alterPartial { case TLId => "Outermost" }))
|
||||
|
||||
converter.io.nasti <> test.io.mem
|
||||
TileLinkWidthAdapter(io.mem.head, converter.io.tl)
|
||||
io.status := test.io.status
|
||||
}
|
||||
776
src/main/scala/groundtest/Regression.scala
Normal file
776
src/main/scala/groundtest/Regression.scala
Normal file
@@ -0,0 +1,776 @@
|
||||
package groundtest
|
||||
|
||||
import Chisel._
|
||||
import uncore.tilelink._
|
||||
import uncore.constants._
|
||||
import uncore.agents._
|
||||
import uncore.util._
|
||||
import junctions.{ParameterizedBundle, HasAddrMapParameters, Timer}
|
||||
import rocket.HellaCacheIO
|
||||
import cde.{Parameters, Field}
|
||||
|
||||
class RegressionIO(implicit val p: Parameters) extends ParameterizedBundle()(p) {
|
||||
val start = Bool(INPUT)
|
||||
val cache = new HellaCacheIO
|
||||
val mem = new ClientUncachedTileLinkIO
|
||||
val finished = Bool(OUTPUT)
|
||||
val errored = Bool(OUTPUT)
|
||||
}
|
||||
|
||||
abstract class Regression(implicit val p: Parameters)
|
||||
extends Module with HasTileLinkParameters with HasAddrMapParameters {
|
||||
val memStart = addrMap("mem").start
|
||||
val memStartBlock = memStart >> p(CacheBlockOffsetBits)
|
||||
val io = new RegressionIO
|
||||
|
||||
def disableCache() {
|
||||
io.cache.req.valid := Bool(false)
|
||||
io.cache.req.bits.addr := UInt(memStart)
|
||||
io.cache.req.bits.typ := UInt(0)
|
||||
io.cache.req.bits.cmd := M_XRD
|
||||
io.cache.req.bits.tag := UInt(0)
|
||||
io.cache.req.bits.data := Bits(0)
|
||||
io.cache.req.bits.phys := Bool(true)
|
||||
io.cache.invalidate_lr := Bool(false)
|
||||
}
|
||||
|
||||
def disableMem() {
|
||||
io.mem.acquire.valid := Bool(false)
|
||||
io.mem.grant.ready := Bool(false)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* This was a bug in which the TileLinkIONarrower logic screwed up
|
||||
* when a PutBlock request and a narrow Get request are sent to it at the
|
||||
* same time. Repeating this sequence enough times will cause a queue to
|
||||
* get filled up and deadlock the system.
|
||||
*/
|
||||
class IOGetAfterPutBlockRegression(implicit p: Parameters) extends Regression()(p) {
|
||||
val nRuns = 7
|
||||
val run = Reg(init = UInt(0, log2Up(nRuns + 1)))
|
||||
|
||||
val (put_beat, put_done) = Counter(
|
||||
io.mem.acquire.fire() && io.mem.acquire.bits.hasData(), tlDataBeats)
|
||||
|
||||
val started = Reg(init = Bool(false))
|
||||
val put_sent = Reg(init = Bool(false))
|
||||
val get_sent = Reg(init = Bool(false))
|
||||
val put_acked = Reg(init = Bool(false))
|
||||
val get_acked = Reg(init = Bool(false))
|
||||
val both_acked = put_acked && get_acked
|
||||
|
||||
when (!started && io.start) { started := Bool(true) }
|
||||
|
||||
io.mem.acquire.valid := !put_sent && started
|
||||
io.mem.acquire.bits := PutBlock(
|
||||
client_xact_id = UInt(0),
|
||||
addr_block = UInt(memStartBlock),
|
||||
addr_beat = put_beat,
|
||||
data = UInt(0))
|
||||
io.mem.grant.ready := Bool(true)
|
||||
|
||||
io.cache.req.valid := !get_sent && started
|
||||
io.cache.req.bits.addr := UInt(addrMap("io:int:bootrom").start)
|
||||
io.cache.req.bits.typ := UInt(log2Ceil(32 / 8))
|
||||
io.cache.req.bits.cmd := M_XRD
|
||||
io.cache.req.bits.tag := UInt(0)
|
||||
io.cache.invalidate_lr := Bool(false)
|
||||
|
||||
when (put_done) { put_sent := Bool(true) }
|
||||
when (io.cache.req.fire()) { get_sent := Bool(true) }
|
||||
when (io.mem.grant.fire()) { put_acked := Bool(true) }
|
||||
when (io.cache.resp.valid) { get_acked := Bool(true) }
|
||||
|
||||
when (both_acked) {
|
||||
when (run < UInt(nRuns - 1)) {
|
||||
put_sent := Bool(false)
|
||||
get_sent := Bool(false)
|
||||
}
|
||||
put_acked := Bool(false)
|
||||
get_acked := Bool(false)
|
||||
run := run + UInt(1)
|
||||
}
|
||||
|
||||
io.finished := (run === UInt(nRuns))
|
||||
}
|
||||
|
||||
/* This was a bug with merging two PutBlocks to the same address in the L2.
|
||||
* The transactor would start accepting beats of the second transaction but
|
||||
* acknowledge both of them when the first one finished.
|
||||
* This caused the state to go funky since the next time around it would
|
||||
* start the put in the middle */
|
||||
class PutBlockMergeRegression(implicit p: Parameters)
|
||||
extends Regression()(p) with HasTileLinkParameters {
|
||||
val s_idle :: s_put :: s_wait :: s_done :: Nil = Enum(Bits(), 4)
|
||||
val state = Reg(init = s_idle)
|
||||
|
||||
disableCache()
|
||||
|
||||
val l2params = p.alterPartial({ case CacheName => "L2Bank" })
|
||||
val nSets = l2params(NSets)
|
||||
val addr_blocks = Vec(Seq(0, 0, nSets).map(num => UInt(num + memStartBlock)))
|
||||
val nSteps = addr_blocks.size
|
||||
val (acq_beat, acq_done) = Counter(io.mem.acquire.fire(), tlDataBeats)
|
||||
val (send_cnt, send_done) = Counter(acq_done, nSteps)
|
||||
val (ack_cnt, ack_done) = Counter(io.mem.grant.fire(), nSteps)
|
||||
|
||||
io.mem.acquire.valid := (state === s_put)
|
||||
io.mem.acquire.bits := PutBlock(
|
||||
client_xact_id = send_cnt,
|
||||
addr_block = addr_blocks(send_cnt),
|
||||
addr_beat = acq_beat,
|
||||
data = Cat(send_cnt, acq_beat))
|
||||
io.mem.grant.ready := Bool(true)
|
||||
|
||||
when (state === s_idle && io.start) { state := s_put }
|
||||
when (send_done) { state := s_wait }
|
||||
when (ack_done) { state := s_done }
|
||||
|
||||
io.finished := (state === s_done)
|
||||
}
|
||||
|
||||
/* Make sure the L2 does "the right thing" when a put is sent no-alloc but
|
||||
* the block is already in cache. It should just treat the request as a
|
||||
* regular allocating put */
|
||||
class NoAllocPutHitRegression(implicit p: Parameters) extends Regression()(p) {
|
||||
val (s_idle :: s_prefetch :: s_put :: s_get ::
|
||||
s_wait :: s_done :: Nil) = Enum(Bits(), 6)
|
||||
val state = Reg(init = s_idle)
|
||||
|
||||
val acq = io.mem.acquire.bits
|
||||
val gnt = io.mem.grant.bits
|
||||
|
||||
val (put_beat, put_done) = Counter(io.mem.acquire.fire() && acq.hasData(), tlDataBeats)
|
||||
val acked = Reg(init = UInt(0, tlDataBeats + 2))
|
||||
|
||||
val addr_block = UInt(memStartBlock + 2)
|
||||
val test_data = UInt(0x3446)
|
||||
|
||||
val prefetch_acq = GetPrefetch(
|
||||
client_xact_id = UInt(0),
|
||||
addr_block = addr_block)
|
||||
val put_acq = PutBlock(
|
||||
client_xact_id = UInt(1),
|
||||
addr_block = addr_block,
|
||||
addr_beat = put_beat,
|
||||
data = test_data,
|
||||
alloc = Bool(false))
|
||||
val get_acq = GetBlock(
|
||||
client_xact_id = UInt(2),
|
||||
addr_block = addr_block)
|
||||
|
||||
io.mem.acquire.valid := state.isOneOf(s_prefetch, s_get, s_put)
|
||||
io.mem.acquire.bits := MuxCase(get_acq, Seq(
|
||||
(state === s_prefetch) -> prefetch_acq,
|
||||
(state === s_put) -> put_acq))
|
||||
io.mem.grant.ready := Bool(true)
|
||||
|
||||
when (state === s_idle && io.start) { state := s_prefetch }
|
||||
when (state === s_prefetch && io.mem.acquire.ready) { state := s_put }
|
||||
when (put_done) { state := s_get }
|
||||
when (state === s_get && io.mem.acquire.ready) { state := s_wait }
|
||||
when (state === s_wait && acked.andR) { state := s_done }
|
||||
|
||||
when (io.mem.grant.fire()) {
|
||||
switch (gnt.client_xact_id) {
|
||||
is (UInt(0)) { acked := acked | UInt(1 << tlDataBeats) }
|
||||
is (UInt(1)) { acked := acked | UInt(1 << (tlDataBeats + 1)) }
|
||||
is (UInt(2)) { acked := acked | UIntToOH(gnt.addr_beat) }
|
||||
}
|
||||
}
|
||||
|
||||
val data_mismatch = io.mem.grant.fire() && gnt.hasData() && gnt.data =/= test_data
|
||||
assert(!data_mismatch, "NoAllocPutHitRegression: data does not match")
|
||||
|
||||
io.finished := (state === s_done)
|
||||
io.errored := data_mismatch
|
||||
|
||||
disableCache()
|
||||
}
|
||||
|
||||
/** Make sure L2 does the right thing when multiple puts are sent for the
|
||||
* same block, but only the first one has the alloc bit set. */
|
||||
class MixedAllocPutRegression(implicit p: Parameters) extends Regression()(p) {
|
||||
val (s_idle :: s_pf_send :: s_pf_wait :: s_put_send :: s_put_wait ::
|
||||
s_get_send :: s_get_wait :: s_done :: Nil) = Enum(Bits(), 8)
|
||||
val state = Reg(init = s_idle)
|
||||
|
||||
/** We have to test two cases: one when the block is already cached
|
||||
* and one when the block is not yet cached.
|
||||
* We use prefetching to assure the first case. */
|
||||
val test_data = Vec(
|
||||
UInt("h2222222211111111"),
|
||||
UInt("h3333333333333333"),
|
||||
UInt("h4444444444444444"),
|
||||
UInt("h5555555555555555"))
|
||||
val test_alloc = Vec(Bool(false), Bool(false), Bool(true), Bool(false))
|
||||
val test_block = Vec(
|
||||
Seq.fill(2) { UInt(memStartBlock + 15) } ++
|
||||
Seq.fill(2) { UInt(memStartBlock + 16) })
|
||||
val test_beat = Vec(UInt(0), UInt(2), UInt(1), UInt(2))
|
||||
|
||||
val (put_acq_id, put_acq_done) = Counter(
|
||||
state === s_put_send && io.mem.acquire.ready, test_data.size)
|
||||
val (put_gnt_cnt, put_gnt_done) = Counter(
|
||||
state === s_put_wait && io.mem.grant.valid, test_data.size)
|
||||
|
||||
val (get_acq_id, get_acq_done) = Counter(
|
||||
state === s_get_send && io.mem.acquire.ready, test_data.size)
|
||||
val (get_gnt_cnt, get_gnt_done) = Counter(
|
||||
state === s_get_wait && io.mem.grant.valid, test_data.size)
|
||||
|
||||
val pf_acquire = PutPrefetch(
|
||||
client_xact_id = UInt(0),
|
||||
addr_block = UInt(memStartBlock + 15))
|
||||
|
||||
val put_acquire = Put(
|
||||
client_xact_id = put_acq_id,
|
||||
addr_block = test_block(put_acq_id),
|
||||
addr_beat = test_beat(put_acq_id),
|
||||
data = test_data(put_acq_id),
|
||||
alloc = test_alloc(put_acq_id))
|
||||
|
||||
val get_acquire = Get(
|
||||
client_xact_id = get_acq_id,
|
||||
addr_block = test_block(get_acq_id),
|
||||
addr_beat = test_beat(get_acq_id))
|
||||
|
||||
io.mem.acquire.valid := state.isOneOf(s_pf_send, s_put_send, s_get_send)
|
||||
io.mem.acquire.bits := MuxLookup(state, pf_acquire, Seq(
|
||||
s_put_send -> put_acquire,
|
||||
s_get_send -> get_acquire))
|
||||
io.mem.grant.ready := state.isOneOf(s_pf_wait, s_put_wait, s_get_wait)
|
||||
|
||||
when (state === s_idle && io.start) { state := s_pf_send }
|
||||
when (state === s_pf_send && io.mem.acquire.ready) { state := s_pf_wait }
|
||||
when (state === s_pf_wait && io.mem.grant.valid) { state := s_put_send }
|
||||
when (put_acq_done) { state := s_put_wait }
|
||||
when (put_gnt_done) { state := s_get_send }
|
||||
when (get_acq_done) { state := s_get_wait }
|
||||
when (get_gnt_done) { state := s_done }
|
||||
|
||||
io.finished := (state === s_done)
|
||||
|
||||
val data_mismatch = state === s_get_wait && io.mem.grant.fire() &&
|
||||
io.mem.grant.bits.data =/= test_data(io.mem.grant.bits.client_xact_id)
|
||||
assert(!data_mismatch, "MixedAllocPutRegression: data mismatch")
|
||||
io.errored := data_mismatch
|
||||
|
||||
disableCache()
|
||||
}
|
||||
|
||||
/* Make sure each no-alloc put triggers a request to outer memory.
|
||||
* Unfortunately, there's no way to verify that this works except by looking
|
||||
* at the waveform */
|
||||
class RepeatedNoAllocPutRegression(implicit p: Parameters) extends Regression()(p) {
|
||||
disableCache()
|
||||
|
||||
val nPuts = 2
|
||||
val (put_beat, put_done) = Counter(io.mem.acquire.fire(), tlDataBeats)
|
||||
val (req_cnt, req_done) = Counter(put_done, nPuts)
|
||||
|
||||
val sending = Reg(init = Bool(false))
|
||||
val acked = Reg(init = UInt(0, nPuts))
|
||||
|
||||
when (!sending && io.start) { sending := Bool(true) }
|
||||
when (sending && req_done) { sending := Bool(false) }
|
||||
|
||||
io.mem.acquire.valid := sending
|
||||
io.mem.acquire.bits := PutBlock(
|
||||
client_xact_id = req_cnt,
|
||||
addr_block = UInt(memStartBlock + 5),
|
||||
addr_beat = put_beat,
|
||||
data = Cat(req_cnt, UInt(0, 8)),
|
||||
alloc = Bool(false))
|
||||
io.mem.grant.ready := Bool(true)
|
||||
|
||||
when (io.mem.grant.fire()) {
|
||||
acked := acked | UIntToOH(io.mem.grant.bits.client_xact_id)
|
||||
}
|
||||
|
||||
io.finished := acked.andR
|
||||
}
|
||||
|
||||
/* Make sure write masking works properly by writing a block of data
|
||||
* piece by piece */
|
||||
class WriteMaskedPutBlockRegression(implicit p: Parameters) extends Regression()(p) {
|
||||
disableCache()
|
||||
|
||||
val (s_idle :: s_put_send :: s_put_ack :: s_stall ::
|
||||
s_get_send :: s_get_ack :: s_done :: Nil) = Enum(Bits(), 7)
|
||||
val state = Reg(init = s_idle)
|
||||
val post_stall_state = Reg(init = s_idle)
|
||||
|
||||
val gnt = io.mem.grant.bits
|
||||
val acq = io.mem.acquire.bits
|
||||
|
||||
val stage = Reg(init = UInt(0, 1))
|
||||
|
||||
val (put_beat, put_block_done) = Counter(
|
||||
io.mem.acquire.fire() && acq.hasData(), tlDataBeats)
|
||||
val put_data = UInt(0x30010040, tlDataBits) + (put_beat << UInt(2))
|
||||
|
||||
val put_acq = PutBlock(
|
||||
client_xact_id = UInt(0),
|
||||
addr_block = UInt(memStartBlock + 7),
|
||||
addr_beat = put_beat,
|
||||
data = Mux(put_beat(0) === stage, put_data, UInt(0)),
|
||||
wmask = Some(Mux(put_beat(0) === stage, Acquire.fullWriteMask, Bits(0))))
|
||||
|
||||
val get_acq = GetBlock(
|
||||
client_xact_id = UInt(0),
|
||||
addr_block = UInt(memStartBlock + 6) + stage)
|
||||
|
||||
io.mem.acquire.valid := state.isOneOf(s_put_send, s_get_send)
|
||||
io.mem.acquire.bits := Mux(state === s_get_send, get_acq, put_acq)
|
||||
io.mem.grant.ready := state.isOneOf(s_put_ack, s_get_ack)
|
||||
|
||||
val (get_cnt, get_done) = Counter(
|
||||
io.mem.grant.fire() && gnt.hasData(), tlDataBeats)
|
||||
val get_data = UInt(0x30010040, tlDataBits) + (get_cnt << UInt(2))
|
||||
|
||||
val (stall_cnt, stall_done) = Counter(state === s_stall, 16)
|
||||
|
||||
when (state === s_idle && io.start) { state := s_put_send }
|
||||
when (put_block_done) { state := s_put_ack }
|
||||
when (state === s_put_ack && io.mem.grant.valid) {
|
||||
post_stall_state := s_get_send
|
||||
state := s_stall
|
||||
}
|
||||
when (stall_done) { state := post_stall_state }
|
||||
when (state === s_get_send && io.mem.acquire.ready) { state := s_get_ack }
|
||||
when (get_done) {
|
||||
// do a read in-between the two put-blocks to overwrite the data buffer
|
||||
when (stage === UInt(0)) {
|
||||
stage := stage + UInt(1)
|
||||
post_stall_state := s_put_send
|
||||
state := s_stall
|
||||
} .otherwise { state := s_done }
|
||||
}
|
||||
|
||||
io.finished := (state === s_done)
|
||||
|
||||
val data_mismatch = io.mem.grant.fire() && io.mem.grant.bits.hasData() &&
|
||||
stage =/= UInt(0) && io.mem.grant.bits.data =/= get_data
|
||||
assert(!data_mismatch, "WriteMaskedPutBlockRegression: data does not match")
|
||||
io.errored := data_mismatch
|
||||
}
|
||||
|
||||
/* Make sure a prefetch that hits returns immediately. */
|
||||
class PrefetchHitRegression(implicit p: Parameters) extends Regression()(p) {
|
||||
disableCache()
|
||||
|
||||
val sending = Reg(init = Bool(false))
|
||||
val nPrefetches = 2
|
||||
val (pf_cnt, pf_done) = Counter(io.mem.acquire.fire(), nPrefetches)
|
||||
val acked = Reg(init = UInt(0, nPrefetches))
|
||||
|
||||
val acq_bits = Vec(
|
||||
PutPrefetch(client_xact_id = UInt(0), addr_block = UInt(memStartBlock + 12)),
|
||||
GetPrefetch(client_xact_id = UInt(1), addr_block = UInt(memStartBlock + 12)))
|
||||
|
||||
io.mem.acquire.valid := sending
|
||||
io.mem.acquire.bits := acq_bits(pf_cnt)
|
||||
io.mem.grant.ready := Bool(true)
|
||||
|
||||
when (io.mem.grant.fire()) {
|
||||
acked := acked | UIntToOH(io.mem.grant.bits.client_xact_id)
|
||||
}
|
||||
|
||||
when (!sending && io.start) { sending := Bool(true) }
|
||||
when (sending && pf_done) { sending := Bool(false) }
|
||||
|
||||
io.finished := acked.andR
|
||||
io.errored := Bool(false)
|
||||
}
|
||||
|
||||
/* This tests the sort of access the pattern that Hwacha uses.
|
||||
* Instead of using PutBlock/GetBlock, it uses word-sized puts and gets
|
||||
* to the same block.
|
||||
* Each request has the same client_xact_id, but there are multiple in flight.
|
||||
* The responses therefore must come back in the order they are sent. */
|
||||
class SequentialSameIdGetRegression(implicit p: Parameters) extends Regression()(p) {
|
||||
disableCache()
|
||||
|
||||
val sending = Reg(init = Bool(false))
|
||||
val finished = Reg(init = Bool(false))
|
||||
|
||||
val (send_cnt, send_done) = Counter(io.mem.acquire.fire(), tlDataBeats)
|
||||
val (recv_cnt, recv_done) = Counter(io.mem.grant.fire(), tlDataBeats)
|
||||
|
||||
when (!sending && io.start) { sending := Bool(true) }
|
||||
when (send_done) { sending := Bool(false) }
|
||||
when (recv_done) { finished := Bool(true) }
|
||||
|
||||
io.mem.acquire.valid := sending
|
||||
io.mem.acquire.bits := Get(
|
||||
client_xact_id = UInt(0),
|
||||
addr_block = UInt(memStartBlock + 9),
|
||||
addr_beat = send_cnt)
|
||||
io.mem.grant.ready := !finished
|
||||
|
||||
io.finished := finished
|
||||
|
||||
val beat_mismatch = io.mem.grant.fire() && io.mem.grant.bits.addr_beat =/= recv_cnt
|
||||
assert(!beat_mismatch, "SequentialSameIdGetRegression: grant received out of order")
|
||||
io.errored := beat_mismatch
|
||||
}
|
||||
|
||||
/* Test that a writeback will occur by writing nWays + 1 blocks to the same
|
||||
* set. This assumes that there is only a single cache bank. If we want to
|
||||
* test multibank configurations, we'll have to think of some other way to
|
||||
* determine which banks are conflicting */
|
||||
class WritebackRegression(implicit p: Parameters) extends Regression()(p) {
|
||||
disableCache()
|
||||
|
||||
val l2params = p.alterPartial({ case CacheName => "L2Bank" })
|
||||
val nSets = l2params(NSets)
|
||||
val nWays = l2params(NWays)
|
||||
|
||||
val addr_blocks = Vec.tabulate(nWays + 1) { i => UInt(memStartBlock + i * nSets) }
|
||||
val data = Vec.tabulate(nWays + 1) { i => UInt((i + 1) * 1423) }
|
||||
|
||||
val (put_beat, put_done) = Counter(
|
||||
io.mem.acquire.fire() && io.mem.acquire.bits.hasData(), tlDataBeats)
|
||||
val (get_beat, get_done) = Counter(
|
||||
io.mem.grant.fire() && io.mem.grant.bits.hasData(), tlDataBeats)
|
||||
val (put_cnt, _) = Counter(put_done, nWays + 1)
|
||||
val (get_cnt, _) = Counter(
|
||||
io.mem.acquire.fire() && !io.mem.acquire.bits.hasData(), nWays + 1)
|
||||
val (ack_cnt, ack_done) = Counter(
|
||||
io.mem.grant.fire() && !io.mem.grant.bits.hasData() || get_done, nWays + 1)
|
||||
|
||||
val s_idle :: s_put :: s_get :: s_done :: Nil = Enum(Bits(), 4)
|
||||
val state = Reg(init = s_idle)
|
||||
val sending = Reg(init = Bool(false))
|
||||
|
||||
io.mem.acquire.valid := sending
|
||||
io.mem.acquire.bits := Mux(state === s_put,
|
||||
PutBlock(
|
||||
client_xact_id = UInt(0),
|
||||
addr_block = addr_blocks(put_cnt),
|
||||
addr_beat = put_beat,
|
||||
data = data(put_cnt)),
|
||||
GetBlock(
|
||||
client_xact_id = UInt(0),
|
||||
addr_block = addr_blocks(get_cnt)))
|
||||
io.mem.grant.ready := !sending
|
||||
|
||||
when (state === s_idle && io.start) { state := s_put; sending := Bool(true) }
|
||||
when (put_done || state === s_get && io.mem.acquire.fire()) {
|
||||
sending := Bool(false)
|
||||
}
|
||||
when (get_done && !ack_done || state === s_put && io.mem.grant.fire()) {
|
||||
sending := Bool(true)
|
||||
}
|
||||
when (ack_done) { state := Mux(state === s_put, s_get, s_done) }
|
||||
|
||||
io.finished := (state === s_done)
|
||||
|
||||
val data_mismatch = io.mem.grant.fire() && io.mem.grant.bits.hasData() &&
|
||||
io.mem.grant.bits.data =/= data(ack_cnt)
|
||||
assert(!data_mismatch, "WritebackRegression: incorrect data")
|
||||
io.errored := data_mismatch
|
||||
}
|
||||
|
||||
class ReleaseRegression(implicit p: Parameters) extends Regression()(p) {
|
||||
disableMem()
|
||||
|
||||
val l1params = p.alterPartial({ case CacheName => "L1D" })
|
||||
val nSets = l1params(NSets)
|
||||
val nWays = l1params(NWays)
|
||||
val blockOffset = l1params(CacheBlockOffsetBits)
|
||||
|
||||
val startBlock = memStartBlock + 10
|
||||
val addr_blocks = Vec.tabulate(nWays + 1) { i => UInt(startBlock + i * nSets) }
|
||||
val data = Vec.tabulate(nWays + 1) { i => UInt((i + 1) * 1522) }
|
||||
val (req_idx, req_done) = Counter(io.cache.req.fire(), nWays + 1)
|
||||
val (resp_idx, resp_done) = Counter(io.cache.resp.valid, nWays + 1)
|
||||
|
||||
val sending = Reg(init = Bool(false))
|
||||
val s_idle :: s_write :: s_read :: s_done :: Nil = Enum(Bits(), 4)
|
||||
val state = Reg(init = s_idle)
|
||||
|
||||
io.cache.req.valid := sending && state.isOneOf(s_write, s_read)
|
||||
io.cache.req.bits.addr := Cat(addr_blocks(req_idx), UInt(0, blockOffset))
|
||||
io.cache.req.bits.typ := UInt(log2Ceil(64 / 8))
|
||||
io.cache.req.bits.cmd := Mux(state === s_write, M_XWR, M_XRD)
|
||||
io.cache.req.bits.tag := UInt(0)
|
||||
io.cache.req.bits.data := data(req_idx)
|
||||
io.cache.req.bits.phys := Bool(true)
|
||||
io.cache.invalidate_lr := Bool(false)
|
||||
|
||||
when (state === s_idle && io.start) {
|
||||
sending := Bool(true)
|
||||
state := s_write
|
||||
}
|
||||
|
||||
when (resp_done) { state := Mux(state === s_write, s_read, s_done) }
|
||||
when (io.cache.req.fire()) { sending := Bool(false) }
|
||||
when (io.cache.resp.valid) { sending := Bool(true) }
|
||||
|
||||
io.finished := (state === s_done)
|
||||
|
||||
val data_mismatch = io.cache.resp.valid && io.cache.resp.bits.has_data &&
|
||||
io.cache.resp.bits.data =/= data(resp_idx)
|
||||
assert(!data_mismatch, "ReleaseRegression: data mismatch")
|
||||
io.errored := data_mismatch
|
||||
}
|
||||
|
||||
class PutBeforePutBlockRegression(implicit p: Parameters) extends Regression()(p) {
|
||||
val (s_idle :: s_put :: s_putblock :: s_wait ::
|
||||
s_finished :: Nil) = Enum(Bits(), 5)
|
||||
val state = Reg(init = s_idle)
|
||||
|
||||
disableCache()
|
||||
|
||||
val (put_block_beat, put_block_done) = Counter(
|
||||
state === s_putblock && io.mem.acquire.ready, tlDataBeats)
|
||||
|
||||
val put_acquire = Put(
|
||||
client_xact_id = UInt(0),
|
||||
addr_block = UInt(memStartBlock),
|
||||
addr_beat = UInt(0),
|
||||
data = UInt(0),
|
||||
wmask = Some(UInt((1 << 8) - 1)))
|
||||
|
||||
val put_block_acquire = PutBlock(
|
||||
client_xact_id = UInt(1),
|
||||
addr_block = UInt(memStartBlock + 1),
|
||||
addr_beat = put_block_beat,
|
||||
data = UInt(0))
|
||||
|
||||
val put_acked = Reg(init = UInt(0, 2))
|
||||
|
||||
val (ack_cnt, all_acked) = Counter(io.mem.grant.fire(), 2)
|
||||
|
||||
io.mem.acquire.valid := state.isOneOf(s_put, s_putblock)
|
||||
io.mem.acquire.bits := Mux(state === s_put, put_acquire, put_block_acquire)
|
||||
io.mem.grant.ready := (state === s_wait)
|
||||
|
||||
when (state === s_idle && io.start) { state := s_put }
|
||||
when (state === s_put && io.mem.acquire.ready) { state := s_putblock }
|
||||
when (put_block_done) { state := s_wait }
|
||||
when (all_acked) { state := s_finished }
|
||||
|
||||
io.finished := (state === s_finished)
|
||||
io.errored := Bool(false)
|
||||
}
|
||||
|
||||
/**
|
||||
* Make sure that multiple gets to the same line and beat are merged
|
||||
* correctly, even if it is a cache miss.
|
||||
*/
|
||||
class MergedGetRegression(implicit p: Parameters) extends Regression()(p) {
|
||||
disableCache()
|
||||
|
||||
val l2params = p.alterPartial({ case CacheName => "L2Bank" })
|
||||
val nSets = l2params(NSets)
|
||||
val nWays = l2params(NWays)
|
||||
|
||||
val (s_idle :: s_put :: s_get :: s_done :: Nil) = Enum(Bits(), 4)
|
||||
val state = Reg(init = s_idle)
|
||||
|
||||
// Write NWays + 1 different conflicting lines to force an eviction of the first line
|
||||
val (put_acq_cnt, put_acq_done) = Counter(state === s_put && io.mem.acquire.fire(), nWays + 1)
|
||||
val (put_gnt_cnt, put_gnt_done) = Counter(state === s_put && io.mem.grant.fire(), nWays + 1)
|
||||
val put_addr = UInt(memStartBlock) + Cat(put_acq_cnt, UInt(0, log2Up(nSets)))
|
||||
|
||||
val (get_acq_cnt, get_acq_done) = Counter(state === s_get && io.mem.acquire.fire(), 2)
|
||||
val (get_gnt_cnt, get_gnt_done) = Counter(state === s_get && io.mem.grant.fire(), 2)
|
||||
val sending = Reg(init = Bool(false))
|
||||
|
||||
when (state === s_idle && io.start) { state := s_put; sending := Bool(true) }
|
||||
when (state === s_put) {
|
||||
when (io.mem.acquire.fire()) { sending := Bool(false) }
|
||||
when (io.mem.grant.fire()) { sending := Bool(true) }
|
||||
when (put_gnt_done) { state := s_get }
|
||||
}
|
||||
when (state === s_get) {
|
||||
when (get_acq_done) { sending := Bool(false) }
|
||||
when (get_gnt_done) { state := s_done }
|
||||
}
|
||||
|
||||
io.mem.acquire.valid := sending
|
||||
io.mem.acquire.bits := Mux(state === s_put,
|
||||
Put(
|
||||
client_xact_id = UInt(0),
|
||||
addr_block = put_addr,
|
||||
addr_beat = UInt(3),
|
||||
data = UInt("hdabb9321")),
|
||||
Get(
|
||||
client_xact_id = get_acq_cnt,
|
||||
addr_block = UInt(memStartBlock),
|
||||
addr_beat = UInt(3)))
|
||||
io.mem.grant.ready := !sending
|
||||
|
||||
val data_mismatch = io.mem.grant.valid && io.mem.grant.bits.hasData() &&
|
||||
io.mem.grant.bits.data =/= UInt("hdabb9321")
|
||||
assert(!data_mismatch, "RepeatedGetRegression: wrong data back")
|
||||
|
||||
io.finished := state === s_done
|
||||
io.errored := data_mismatch
|
||||
}
|
||||
|
||||
/**
|
||||
* Make sure that multiple puts to the same line and beat are merged
|
||||
* correctly, even if there is a release from the L1
|
||||
*/
|
||||
class MergedPutRegression(implicit p: Parameters) extends Regression()(p)
|
||||
with HasTileLinkParameters {
|
||||
val (s_idle :: s_cache_req :: s_cache_wait ::
|
||||
s_put :: s_get :: s_done :: Nil) = Enum(Bits(), 6)
|
||||
val state = Reg(init = s_idle)
|
||||
|
||||
io.cache.req.valid := (state === s_cache_req)
|
||||
io.cache.req.bits.cmd := M_XWR
|
||||
io.cache.req.bits.typ := UInt(log2Ceil(64 / 8))
|
||||
io.cache.req.bits.addr := UInt(memStart)
|
||||
io.cache.req.bits.data := UInt(1)
|
||||
io.cache.req.bits.tag := UInt(0)
|
||||
|
||||
val sending = Reg(init = Bool(false))
|
||||
val delaying = Reg(init = Bool(false))
|
||||
val (put_cnt, put_done) = Counter(io.mem.acquire.fire(), tlMaxClientXacts)
|
||||
val (delay_cnt, delay_done) = Counter(delaying, 8)
|
||||
val put_acked = Reg(UInt(width = 3), init = UInt(0))
|
||||
|
||||
io.mem.acquire.valid := sending && !delaying
|
||||
io.mem.acquire.bits := Mux(state === s_put,
|
||||
Put(
|
||||
client_xact_id = put_cnt,
|
||||
addr_block = UInt(memStartBlock),
|
||||
addr_beat = UInt(0),
|
||||
data = put_cnt + UInt(2)),
|
||||
Get(
|
||||
client_xact_id = UInt(0),
|
||||
addr_block = UInt(memStartBlock),
|
||||
addr_beat = UInt(0)))
|
||||
io.mem.grant.ready := Bool(true)
|
||||
|
||||
when (state === s_idle && io.start) { state := s_cache_req }
|
||||
when (io.cache.req.fire()) { state := s_cache_wait }
|
||||
when (io.cache.resp.valid) { state := s_put; sending := Bool(true) }
|
||||
|
||||
when (io.mem.acquire.fire()) {
|
||||
delaying := Bool(true)
|
||||
when (put_done || state === s_get) { sending := Bool(false) }
|
||||
}
|
||||
when (delay_done) { delaying := Bool(false) }
|
||||
|
||||
when (io.mem.grant.fire()) {
|
||||
when (state === s_put) {
|
||||
put_acked := put_acked | UIntToOH(io.mem.grant.bits.client_xact_id)
|
||||
}
|
||||
when (state === s_get) { state := s_done }
|
||||
}
|
||||
|
||||
when (state === s_put && put_acked.andR) {
|
||||
state := s_get
|
||||
sending := Bool(true)
|
||||
}
|
||||
|
||||
val expected_data = UInt(2 + tlMaxClientXacts - 1)
|
||||
val data_mismatch = io.mem.grant.valid && io.mem.grant.bits.hasData() &&
|
||||
io.mem.grant.bits.data =/= expected_data
|
||||
|
||||
assert(!data_mismatch, "MergedPutRegression: data mismatch")
|
||||
|
||||
io.finished := (state === s_done)
|
||||
io.errored := data_mismatch
|
||||
}
|
||||
|
||||
object RegressionTests {
|
||||
def cacheRegressions(implicit p: Parameters) = Seq(
|
||||
Module(new PutBlockMergeRegression),
|
||||
Module(new NoAllocPutHitRegression),
|
||||
Module(new RepeatedNoAllocPutRegression),
|
||||
Module(new WriteMaskedPutBlockRegression),
|
||||
Module(new PrefetchHitRegression),
|
||||
Module(new SequentialSameIdGetRegression),
|
||||
Module(new WritebackRegression),
|
||||
Module(new PutBeforePutBlockRegression),
|
||||
Module(new MixedAllocPutRegression),
|
||||
Module(new ReleaseRegression),
|
||||
Module(new MergedGetRegression),
|
||||
Module(new MergedPutRegression))
|
||||
def broadcastRegressions(implicit p: Parameters) = Seq(
|
||||
Module(new IOGetAfterPutBlockRegression),
|
||||
Module(new WriteMaskedPutBlockRegression),
|
||||
Module(new PutBeforePutBlockRegression),
|
||||
Module(new ReleaseRegression))
|
||||
}
|
||||
|
||||
case object GroundTestRegressions extends Field[Parameters => Seq[Regression]]
|
||||
|
||||
class RegressionTest(implicit p: Parameters) extends GroundTest()(p) {
|
||||
val regressions = p(GroundTestRegressions)(p)
|
||||
val regress_idx = Reg(init = UInt(0, log2Up(regressions.size + 1)))
|
||||
val cur_finished = Wire(init = Bool(false))
|
||||
val all_done = (regress_idx === UInt(regressions.size))
|
||||
val start = Reg(init = Bool(true))
|
||||
|
||||
// default output values
|
||||
io.mem.head.acquire.valid := Bool(false)
|
||||
io.mem.head.acquire.bits := GetBlock(
|
||||
client_xact_id = UInt(0),
|
||||
addr_block = UInt(0))
|
||||
io.mem.head.grant.ready := Bool(false)
|
||||
io.cache.head.req.valid := Bool(false)
|
||||
io.cache.head.req.bits.addr := UInt(0)
|
||||
io.cache.head.req.bits.typ := UInt(log2Ceil(64 / 8))
|
||||
io.cache.head.req.bits.cmd := M_XRD
|
||||
io.cache.head.req.bits.tag := UInt(0)
|
||||
io.cache.head.req.bits.phys := Bool(true)
|
||||
io.cache.head.req.bits.data := UInt(0)
|
||||
io.cache.head.invalidate_lr := Bool(false)
|
||||
|
||||
regressions.zipWithIndex.foreach { case (regress, i) =>
|
||||
val me = regress_idx === UInt(i)
|
||||
regress.io.start := me && start
|
||||
regress.io.mem.acquire.ready := io.mem.head.acquire.ready && me
|
||||
regress.io.mem.grant.valid := io.mem.head.grant.valid && me
|
||||
regress.io.mem.grant.bits := io.mem.head.grant.bits
|
||||
regress.io.cache.req.ready := io.cache.head.req.ready && me
|
||||
regress.io.cache.resp.valid := io.cache.head.resp.valid && me
|
||||
regress.io.cache.resp.bits := io.cache.head.resp.bits
|
||||
|
||||
when (me) {
|
||||
io.mem.head.acquire.valid := regress.io.mem.acquire.valid
|
||||
io.mem.head.acquire.bits := regress.io.mem.acquire.bits
|
||||
io.mem.head.grant.ready := regress.io.mem.grant.ready
|
||||
io.cache.head.req.valid := regress.io.cache.req.valid
|
||||
io.cache.head.req.bits := regress.io.cache.req.bits
|
||||
io.cache.head.invalidate_lr := regress.io.cache.invalidate_lr
|
||||
io.status.error.valid := regress.io.errored
|
||||
io.status.error.bits := UInt(i)
|
||||
cur_finished := regress.io.finished
|
||||
}
|
||||
|
||||
when (regress.io.start) {
|
||||
printf(s"Starting regression ${regress.getClass.getSimpleName}\n")
|
||||
}
|
||||
}
|
||||
|
||||
when (cur_finished && !all_done) {
|
||||
start := Bool(true)
|
||||
regress_idx := regress_idx + UInt(1)
|
||||
}
|
||||
when (start) { start := Bool(false) }
|
||||
|
||||
val timeout = Timer(5000, start, cur_finished)
|
||||
assert(!timeout, "Regression timed out")
|
||||
|
||||
io.status.finished := all_done
|
||||
io.status.timeout.valid := timeout
|
||||
io.status.timeout.bits := UInt(0)
|
||||
|
||||
assert(!(all_done && io.mem.head.grant.valid),
|
||||
"Getting grant after test completion")
|
||||
|
||||
when (all_done) {
|
||||
io.status.error.valid := io.mem.head.grant.valid
|
||||
io.status.error.bits := UInt(regressions.size)
|
||||
}
|
||||
}
|
||||
139
src/main/scala/groundtest/Tile.scala
Normal file
139
src/main/scala/groundtest/Tile.scala
Normal file
@@ -0,0 +1,139 @@
|
||||
package groundtest
|
||||
|
||||
import Chisel._
|
||||
import rocket._
|
||||
import uncore.tilelink._
|
||||
import junctions._
|
||||
import scala.util.Random
|
||||
import scala.collection.mutable.ListBuffer
|
||||
import cde.{Parameters, Field}
|
||||
|
||||
case object BuildGroundTest extends Field[Parameters => GroundTest]
|
||||
|
||||
case class GroundTestTileSettings(
|
||||
uncached: Int = 0, cached: Int = 0, ptw: Int = 0, maxXacts: Int = 1)
|
||||
case object GroundTestKey extends Field[Seq[GroundTestTileSettings]]
|
||||
case object GroundTestId extends Field[Int]
|
||||
|
||||
trait HasGroundTestConstants {
|
||||
val timeoutCodeBits = 4
|
||||
val errorCodeBits = 4
|
||||
}
|
||||
|
||||
trait HasGroundTestParameters extends HasAddrMapParameters {
|
||||
implicit val p: Parameters
|
||||
val tileId = p(GroundTestId)
|
||||
val tileSettings = p(GroundTestKey)(tileId)
|
||||
val nUncached = tileSettings.uncached
|
||||
val nCached = tileSettings.cached
|
||||
val nPTW = tileSettings.ptw
|
||||
val memStart = addrMap("mem").start
|
||||
val memStartBlock = memStart >> p(CacheBlockOffsetBits)
|
||||
}
|
||||
|
||||
class DummyPTW(n: Int)(implicit p: Parameters) extends CoreModule()(p) {
|
||||
val io = new Bundle {
|
||||
val requestors = Vec(n, new TLBPTWIO).flip
|
||||
}
|
||||
|
||||
val req_arb = Module(new RRArbiter(new PTWReq, n))
|
||||
req_arb.io.in <> io.requestors.map(_.req)
|
||||
req_arb.io.out.ready := Bool(true)
|
||||
|
||||
def vpn_to_ppn(vpn: UInt): UInt = vpn(ppnBits - 1, 0)
|
||||
|
||||
class QueueChannel extends ParameterizedBundle()(p) {
|
||||
val ppn = UInt(width = ppnBits)
|
||||
val chosen = UInt(width = log2Up(n))
|
||||
}
|
||||
|
||||
val s1_ppn = vpn_to_ppn(req_arb.io.out.bits.addr)
|
||||
val s2_ppn = RegEnable(s1_ppn, req_arb.io.out.valid)
|
||||
val s2_chosen = RegEnable(req_arb.io.chosen, req_arb.io.out.valid)
|
||||
val s2_valid = Reg(next = req_arb.io.out.valid)
|
||||
|
||||
val s2_resp = Wire(new PTWResp)
|
||||
s2_resp.pte.ppn := s2_ppn
|
||||
s2_resp.pte.reserved_for_software := UInt(0)
|
||||
s2_resp.pte.d := Bool(true)
|
||||
s2_resp.pte.a := Bool(false)
|
||||
s2_resp.pte.g := Bool(false)
|
||||
s2_resp.pte.u := Bool(true)
|
||||
s2_resp.pte.r := Bool(true)
|
||||
s2_resp.pte.w := Bool(true)
|
||||
s2_resp.pte.x := Bool(false)
|
||||
s2_resp.pte.v := Bool(true)
|
||||
|
||||
io.requestors.zipWithIndex.foreach { case (requestor, i) =>
|
||||
requestor.resp.valid := s2_valid && s2_chosen === UInt(i)
|
||||
requestor.resp.bits := s2_resp
|
||||
requestor.status.vm := UInt("b01000")
|
||||
requestor.status.prv := UInt(PRV.S)
|
||||
requestor.invalidate := Bool(false)
|
||||
}
|
||||
}
|
||||
|
||||
class GroundTestStatus extends Bundle with HasGroundTestConstants {
|
||||
val finished = Bool(OUTPUT)
|
||||
val timeout = Valid(UInt(width = timeoutCodeBits))
|
||||
val error = Valid(UInt(width = errorCodeBits))
|
||||
}
|
||||
|
||||
class GroundTestIO(implicit val p: Parameters) extends ParameterizedBundle()(p)
|
||||
with HasGroundTestParameters {
|
||||
val cache = Vec(nCached, new HellaCacheIO)
|
||||
val mem = Vec(nUncached, new ClientUncachedTileLinkIO)
|
||||
val ptw = Vec(nPTW, new TLBPTWIO)
|
||||
val status = new GroundTestStatus
|
||||
}
|
||||
|
||||
abstract class GroundTest(implicit val p: Parameters) extends Module
|
||||
with HasGroundTestParameters {
|
||||
val io = new GroundTestIO
|
||||
}
|
||||
|
||||
class GroundTestTile(resetSignal: Bool)
|
||||
(implicit val p: Parameters)
|
||||
extends Tile(resetSignal = resetSignal)(p)
|
||||
with HasGroundTestParameters {
|
||||
|
||||
override val io = new TileIO {
|
||||
val success = Bool(OUTPUT)
|
||||
}
|
||||
|
||||
val test = p(BuildGroundTest)(dcacheParams)
|
||||
|
||||
val ptwPorts = ListBuffer.empty ++= test.io.ptw
|
||||
val memPorts = ListBuffer.empty ++= test.io.mem
|
||||
|
||||
if (nCached > 0) {
|
||||
val dcache_io = HellaCache(p(DCacheKey))(dcacheParams)
|
||||
val dcacheArb = Module(new HellaCacheArbiter(nCached)(dcacheParams))
|
||||
|
||||
dcacheArb.io.requestor.zip(test.io.cache).foreach {
|
||||
case (requestor, cache) =>
|
||||
val dcacheIF = Module(new SimpleHellaCacheIF()(dcacheParams))
|
||||
dcacheIF.io.requestor <> cache
|
||||
requestor <> dcacheIF.io.cache
|
||||
}
|
||||
dcache_io.cpu <> dcacheArb.io.mem
|
||||
io.cached.head <> dcache_io.mem
|
||||
|
||||
// SimpleHellaCacheIF leaves invalidate_lr dangling, so we wire it to false
|
||||
dcache_io.cpu.invalidate_lr := Bool(false)
|
||||
|
||||
ptwPorts += dcache_io.ptw
|
||||
}
|
||||
|
||||
if (ptwPorts.size > 0) {
|
||||
val ptw = Module(new DummyPTW(ptwPorts.size))
|
||||
ptw.io.requestors <> ptwPorts
|
||||
}
|
||||
|
||||
require(memPorts.size == io.uncached.size)
|
||||
if (memPorts.size > 0) {
|
||||
io.uncached <> memPorts
|
||||
}
|
||||
|
||||
io.success := test.io.status.finished
|
||||
}
|
||||
629
src/main/scala/groundtest/TraceGen.scala
Normal file
629
src/main/scala/groundtest/TraceGen.scala
Normal file
@@ -0,0 +1,629 @@
|
||||
// This file was originally written by Matthew Naylor, University of
|
||||
// Cambridge, based on code already present in the groundtest repo.
|
||||
//
|
||||
// This software was partly developed by the University of Cambridge
|
||||
// Computer Laboratory under DARPA/AFRL contract FA8750-10-C-0237
|
||||
// ("CTSRD"), as part of the DARPA CRASH research programme.
|
||||
//
|
||||
// This software was partly developed by the University of Cambridge
|
||||
// Computer Laboratory under DARPA/AFRL contract FA8750-11-C-0249
|
||||
// ("MRC2"), as part of the DARPA MRC research programme.
|
||||
//
|
||||
// This software was partly developed by the University of Cambridge
|
||||
// Computer Laboratory as part of the Rigorous Engineering of
|
||||
// Mainstream Systems (REMS) project, funded by EPSRC grant
|
||||
// EP/K008528/1.
|
||||
|
||||
package groundtest
|
||||
|
||||
import Chisel._
|
||||
import uncore.tilelink._
|
||||
import uncore.constants._
|
||||
import uncore.devices.NTiles
|
||||
import junctions._
|
||||
import rocket._
|
||||
import scala.util.Random
|
||||
import cde.{Parameters, Field}
|
||||
|
||||
// =======
|
||||
// Outline
|
||||
// =======
|
||||
|
||||
// Generate memory traces that result from random sequences of memory
|
||||
// operations. These traces can then be validated by an external
|
||||
// tool. A trace is a simply sequence of memory requests and
|
||||
// responses.
|
||||
|
||||
// ==========================
|
||||
// Trace-generator parameters
|
||||
// ==========================
|
||||
|
||||
// Compile-time parameters:
|
||||
//
|
||||
// * The id of the generator (there may be more than one in a
|
||||
// multi-core system).
|
||||
//
|
||||
// * The total number of generators present in the system.
|
||||
//
|
||||
// * The desired number of requests to be sent by each generator.
|
||||
//
|
||||
// * A bag of physical addresses, shared by all cores, from which an
|
||||
// address can be drawn when generating a fresh request.
|
||||
//
|
||||
// * A number of random 'extra addresses', local to each core, from
|
||||
// which an address can be drawn when generating a fresh request.
|
||||
// (This is a way to generate a wider range of addresses without having
|
||||
// to repeatedly recompile with a different address bag.)
|
||||
|
||||
case object AddressBag extends Field[List[BigInt]]
|
||||
|
||||
trait HasTraceGenParams {
|
||||
implicit val p: Parameters
|
||||
val numGens = p(NTiles)
|
||||
val numBitsInId = log2Up(numGens)
|
||||
val numReqsPerGen = p(GeneratorKey).maxRequests
|
||||
val memRespTimeout = 8192
|
||||
val numBitsInWord = p(XLen)
|
||||
val numBytesInWord = numBitsInWord / 8
|
||||
val numBitsInWordOffset = log2Up(numBytesInWord)
|
||||
val addressBag = p(AddressBag)
|
||||
val addressBagLen = addressBag.length
|
||||
val logAddressBagLen = log2Up(addressBagLen)
|
||||
val genExtraAddrs = false
|
||||
val logNumExtraAddrs = 1
|
||||
val numExtraAddrs = 1 << logNumExtraAddrs
|
||||
val maxTags = 8
|
||||
|
||||
require(numBytesInWord * 8 == numBitsInWord)
|
||||
require((1 << logAddressBagLen) == addressBagLen)
|
||||
}
|
||||
|
||||
// ============
|
||||
// Trace format
|
||||
// ============
|
||||
|
||||
// Let <id> denote a generator id;
|
||||
// <addr> denote an address (in hex);
|
||||
// <data> denote a value that is stored at an address;
|
||||
// <tag> denote a unique request/response id;
|
||||
// and <time> denote an integer representing a cycle-count.
|
||||
|
||||
// Each line in the trace takes one of the following formats.
|
||||
//
|
||||
// <id>: load-req <addr> #<tag> @<time>
|
||||
// <id>: load-reserve-req <addr> #<tag> @<time>
|
||||
// <id>: store-req <data> <addr> #<tag> @<time>
|
||||
// <id>: store-cond-req <data> <addr> #<tag> @<time>
|
||||
// <id>: swap-req <data> <addr> #<tag> @<time>
|
||||
// <id>: resp <data> #<tag> @<time>
|
||||
// <id>: fence-req @<time>
|
||||
// <id>: fence-resp @<time>
|
||||
|
||||
// NOTE: The (address, value) pair of every generated store is unique,
|
||||
// i.e. the same value is never written to the same address twice.
|
||||
// This aids trace validation.
|
||||
|
||||
// ============
|
||||
// Random seeds
|
||||
// ============
|
||||
|
||||
// The generator employs "unitialised registers" to seed its PRNGs;
|
||||
// these are randomly initialised by the C++ backend. This means that
|
||||
// the "-s" command-line argument to the Rocket emulator can be used
|
||||
// to generate new traces, or to replay specific ones.
|
||||
|
||||
// ===========
|
||||
// Tag manager
|
||||
// ===========
|
||||
|
||||
// This is used to obtain unique tags for memory requests: each
|
||||
// request must carry a unique tag since responses can come back
|
||||
// out-of-order.
|
||||
//
|
||||
// The tag manager can be viewed as a set of tags. The user can take
|
||||
// a tag out of the set (if there is one available) and later put it
|
||||
// back.
|
||||
|
||||
class TagMan(val logNumTags : Int) extends Module {
|
||||
val io = new Bundle {
|
||||
// Is there a tag available?
|
||||
val available = Bool(OUTPUT)
|
||||
// If so, which one?
|
||||
val tagOut = UInt(OUTPUT, logNumTags)
|
||||
// User pulses this to take the currently available tag
|
||||
val take = Bool(INPUT)
|
||||
// User pulses this to put a tag back
|
||||
val put = Bool(INPUT)
|
||||
// And the tag put back is
|
||||
val tagIn = UInt(INPUT, logNumTags)
|
||||
}
|
||||
|
||||
// Total number of tags available
|
||||
val numTags = 1 << logNumTags
|
||||
|
||||
// For each tag, record whether or not it is in use
|
||||
val inUse = List.fill(numTags)(Reg(init = Bool(false)))
|
||||
|
||||
// Mapping from each tag to its 'inUse' bit
|
||||
val inUseMap = (0 to numTags-1).map(i => UInt(i)).zip(inUse)
|
||||
|
||||
// Next tag to offer
|
||||
val nextTag = Reg(init = UInt(0, logNumTags))
|
||||
io.tagOut := nextTag
|
||||
|
||||
// Is the next tag available?
|
||||
io.available := ~MuxLookup(nextTag, Bool(true), inUseMap)
|
||||
|
||||
// When user takes a tag
|
||||
when (io.take) {
|
||||
for ((i, b) <- inUseMap) {
|
||||
when (i === nextTag) { b := Bool(true) }
|
||||
}
|
||||
nextTag := nextTag + UInt(1)
|
||||
}
|
||||
|
||||
// When user puts a tag back
|
||||
when (io.put) {
|
||||
for ((i, b) <- inUseMap) {
|
||||
when (i === io.tagIn) { b := Bool(false) }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ===============
|
||||
// Trace generator
|
||||
// ===============
|
||||
|
||||
class TraceGenerator(id: Int)
|
||||
(implicit p: Parameters) extends L1HellaCacheModule()(p)
|
||||
with HasTraceGenParams {
|
||||
val io = new Bundle {
|
||||
val finished = Bool(OUTPUT)
|
||||
val timeout = Bool(OUTPUT)
|
||||
val mem = new HellaCacheIO
|
||||
}
|
||||
|
||||
val reqTimer = Module(new Timer(8192, maxTags))
|
||||
reqTimer.io.start.valid := io.mem.req.fire()
|
||||
reqTimer.io.start.bits := io.mem.req.bits.tag
|
||||
reqTimer.io.stop.valid := io.mem.resp.valid
|
||||
reqTimer.io.stop.bits := io.mem.resp.bits.tag
|
||||
|
||||
assert(!reqTimer.io.timeout.valid, s"TraceGen core ${id}: request timed out")
|
||||
|
||||
// Random addresses
|
||||
// ----------------
|
||||
|
||||
// Address bag, shared by all cores, taken from module parameters.
|
||||
// In addition, there is a per-core random selection of extra addresses.
|
||||
|
||||
val addrHashMap = p(GlobalAddrMap)
|
||||
val baseAddr = addrHashMap("mem").start + 0x01000000
|
||||
|
||||
val bagOfAddrs = addressBag.map(x => UInt(x, numBitsInWord))
|
||||
|
||||
val extraAddrs = (0 to numExtraAddrs-1).
|
||||
map(i => Reg(UInt(width = 16)))
|
||||
|
||||
// A random index into the address bag.
|
||||
|
||||
val randAddrBagIndex = LCG(logAddressBagLen)
|
||||
|
||||
// A random address from the address bag.
|
||||
|
||||
val addrBagIndices = (0 to addressBagLen-1).
|
||||
map(i => UInt(i, logAddressBagLen))
|
||||
|
||||
val randAddrFromBag = MuxLookup(randAddrBagIndex, UInt(0),
|
||||
addrBagIndices.zip(bagOfAddrs))
|
||||
|
||||
// Random address from the address bag or the extra addresses.
|
||||
|
||||
val randAddr =
|
||||
if (! genExtraAddrs) {
|
||||
randAddrFromBag
|
||||
}
|
||||
else {
|
||||
// A random index into the extra addresses.
|
||||
|
||||
val randExtraAddrIndex = LCG(logNumExtraAddrs)
|
||||
|
||||
// A random address from the extra addresses.
|
||||
|
||||
val extraAddrIndices = (0 to numExtraAddrs-1).
|
||||
map(i => UInt(i, logNumExtraAddrs))
|
||||
|
||||
val randAddrFromExtra = Cat(UInt(0),
|
||||
MuxLookup(randExtraAddrIndex, UInt(0),
|
||||
extraAddrIndices.zip(extraAddrs)), UInt(0, 3))
|
||||
|
||||
Frequency(List(
|
||||
(1, randAddrFromBag),
|
||||
(1, randAddrFromExtra)))
|
||||
}
|
||||
|
||||
// Random opcodes
|
||||
// --------------
|
||||
|
||||
// Generate random opcodes for memory operations according to the
|
||||
// given frequency distribution.
|
||||
|
||||
// Opcodes
|
||||
val (opNop :: opLoad :: opStore ::
|
||||
opFence :: opLRSC :: opSwap ::
|
||||
opDelay :: Nil) = Enum(Bits(), 7)
|
||||
|
||||
// Distribution specified as a list of (frequency,value) pairs.
|
||||
// NOTE: frequencies must sum to a power of two.
|
||||
|
||||
val randOp = Frequency(List(
|
||||
(10, opLoad),
|
||||
(10, opStore),
|
||||
(4, opFence),
|
||||
(3, opLRSC),
|
||||
(3, opSwap),
|
||||
(2, opDelay)))
|
||||
|
||||
// Request/response tags
|
||||
// ---------------------
|
||||
|
||||
// Responses may come back out-of-order. Each request and response
|
||||
// therefore contains a unique 7-bit identifier, referred to as a
|
||||
// "tag", used to match each response with its corresponding request.
|
||||
|
||||
// Create a tag manager giving out unique 3-bit tags
|
||||
val tagMan = Module(new TagMan(log2Ceil(maxTags)))
|
||||
|
||||
// Default inputs
|
||||
tagMan.io.take := Bool(false);
|
||||
tagMan.io.put := Bool(false);
|
||||
tagMan.io.tagIn := UInt(0);
|
||||
|
||||
// Cycle counter
|
||||
// -------------
|
||||
|
||||
// 32-bit cycle count used to record send-times of requests and
|
||||
// receive-times of respones.
|
||||
|
||||
val cycleCount = Reg(init = UInt(0, 32))
|
||||
cycleCount := cycleCount + UInt(1);
|
||||
|
||||
// Delay timer
|
||||
// -----------
|
||||
|
||||
// Used to implement the delay operation and to insert random
|
||||
// delays between load-reserve and store-conditional commands.
|
||||
|
||||
// A 16-bit timer is plenty
|
||||
val delayTimer = Module(new DynamicTimer(16))
|
||||
|
||||
// Used to generate a random delay period
|
||||
val randDelayBase = LCG16()
|
||||
|
||||
// Random delay period: usually small, occasionally big
|
||||
val randDelay = Frequency(List(
|
||||
(14, UInt(0, 13) ## randDelayBase(2, 0)),
|
||||
(2, UInt(0, 11) ## randDelayBase(5, 0))))
|
||||
|
||||
// Default inputs
|
||||
delayTimer.io.start := Bool(false)
|
||||
delayTimer.io.period := randDelay
|
||||
delayTimer.io.stop := Bool(false)
|
||||
|
||||
// Operation dispatch
|
||||
// ------------------
|
||||
|
||||
// Hardware thread id
|
||||
val tid = UInt(id, numBitsInId)
|
||||
|
||||
// Request & response count
|
||||
val reqCount = Reg(init = UInt(0, 32))
|
||||
val respCount = Reg(init = UInt(0, 32))
|
||||
|
||||
// Current operation being executed
|
||||
val currentOp = Reg(init = opNop)
|
||||
|
||||
// If larger than 0, a multi-cycle operation is in progress.
|
||||
// Value indicates stage of progress.
|
||||
val opInProgress = Reg(init = UInt(0, 2))
|
||||
|
||||
// Indicate when a fresh request is to be sent
|
||||
val sendFreshReq = Wire(Bool())
|
||||
sendFreshReq := Bool(false)
|
||||
|
||||
// Used to generate unique data values
|
||||
val nextData = Reg(init = UInt(1, numBitsInWord-numBitsInId))
|
||||
|
||||
// Registers for all the interesting parts of a request
|
||||
val reqValid = Reg(init = Bool(false))
|
||||
val reqAddr = Reg(init = UInt(0, numBitsInWord))
|
||||
val reqData = Reg(init = UInt(0, numBitsInWord))
|
||||
val reqCmd = Reg(init = UInt(0, 5))
|
||||
val reqTag = Reg(init = UInt(0, 7))
|
||||
|
||||
// Condition on being allowed to send a fresh request
|
||||
val canSendFreshReq = (!reqValid || io.mem.req.fire()) &&
|
||||
tagMan.io.available
|
||||
|
||||
// Operation dispatch
|
||||
when (reqCount < UInt(numReqsPerGen)) {
|
||||
|
||||
// No-op
|
||||
when (currentOp === opNop) {
|
||||
// Move on to a new operation
|
||||
currentOp := randOp
|
||||
}
|
||||
|
||||
// Fence
|
||||
when (currentOp === opFence) {
|
||||
when (opInProgress === UInt(0) && !reqValid) {
|
||||
// Emit fence request
|
||||
printf("%d: fence-req @%d\n", tid, cycleCount)
|
||||
// Multi-cycle operation now in progress
|
||||
opInProgress := UInt(1)
|
||||
}
|
||||
// Wait until all requests have had a response
|
||||
.elsewhen (reqCount === respCount) {
|
||||
// Emit fence response
|
||||
printf("%d: fence-resp @%d\n", tid, cycleCount)
|
||||
// Move on to a new operation
|
||||
currentOp := randOp
|
||||
// Operation finished
|
||||
opInProgress := UInt(0)
|
||||
}
|
||||
}
|
||||
|
||||
// Delay
|
||||
when (currentOp === opDelay) {
|
||||
when (opInProgress === UInt(0)) {
|
||||
// Start timer
|
||||
delayTimer.io.start := Bool(true)
|
||||
// Multi-cycle operation now in progress
|
||||
opInProgress := UInt(1)
|
||||
}
|
||||
.elsewhen (delayTimer.io.timeout) {
|
||||
// Move on to a new operation
|
||||
currentOp := randOp
|
||||
// Operation finished
|
||||
opInProgress := UInt(0)
|
||||
}
|
||||
}
|
||||
|
||||
// Load, store, or atomic swap
|
||||
when (currentOp === opLoad ||
|
||||
currentOp === opStore ||
|
||||
currentOp === opSwap) {
|
||||
when (canSendFreshReq) {
|
||||
// Set address
|
||||
reqAddr := randAddr
|
||||
// Set command
|
||||
when (currentOp === opLoad) {
|
||||
reqCmd := M_XRD
|
||||
} .elsewhen (currentOp === opStore) {
|
||||
reqCmd := M_XWR
|
||||
} .elsewhen (currentOp === opSwap) {
|
||||
reqCmd := M_XA_SWAP
|
||||
}
|
||||
// Send request
|
||||
sendFreshReq := Bool(true)
|
||||
// Move on to a new operation
|
||||
currentOp := randOp
|
||||
}
|
||||
}
|
||||
|
||||
// Load-reserve and store-conditional
|
||||
// First issue an LR, then delay, then issue an SC
|
||||
when (currentOp === opLRSC) {
|
||||
// LR request has not yet been sent
|
||||
when (opInProgress === UInt(0)) {
|
||||
when (canSendFreshReq) {
|
||||
// Set address and command
|
||||
reqAddr := randAddr
|
||||
reqCmd := M_XLR
|
||||
// Send request
|
||||
sendFreshReq := Bool(true)
|
||||
// Multi-cycle operation now in progress
|
||||
opInProgress := UInt(1)
|
||||
}
|
||||
}
|
||||
// LR request has been sent, start delay timer
|
||||
when (opInProgress === UInt(1)) {
|
||||
// Start timer
|
||||
delayTimer.io.start := Bool(true)
|
||||
// Indicate that delay has started
|
||||
opInProgress := UInt(2)
|
||||
}
|
||||
// Delay in progress
|
||||
when (opInProgress === UInt(2)) {
|
||||
when (delayTimer.io.timeout) {
|
||||
// Delay finished
|
||||
opInProgress := UInt(3)
|
||||
}
|
||||
}
|
||||
// Delay finished, send SC request
|
||||
when (opInProgress === UInt(3)) {
|
||||
when (canSendFreshReq) {
|
||||
// Set command, but leave address
|
||||
// i.e. use same address as LR did
|
||||
reqCmd := M_XSC
|
||||
// Send request
|
||||
sendFreshReq := Bool(true)
|
||||
// Multi-cycle operation finished
|
||||
opInProgress := UInt(0)
|
||||
// Move on to a new operation
|
||||
currentOp := randOp
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Sending of requests
|
||||
// -------------------
|
||||
|
||||
when (sendFreshReq) {
|
||||
// Grab a unique tag for the request
|
||||
reqTag := tagMan.io.tagOut
|
||||
tagMan.io.take := Bool(true)
|
||||
// Fill in unique data
|
||||
reqData := Cat(nextData, tid)
|
||||
nextData := nextData + UInt(1)
|
||||
// Request is good to go!
|
||||
reqValid := Bool(true)
|
||||
// Increment request count
|
||||
reqCount := reqCount + UInt(1)
|
||||
}
|
||||
.elsewhen (io.mem.req.fire()) {
|
||||
// Request has been sent and there is no new request ready
|
||||
reqValid := Bool(false)
|
||||
}
|
||||
|
||||
// Wire up interface to memory
|
||||
io.mem.req.valid := reqValid
|
||||
io.mem.req.bits.addr := reqAddr
|
||||
io.mem.req.bits.data := reqData
|
||||
io.mem.req.bits.typ := UInt(log2Ceil(numBytesInWord))
|
||||
io.mem.req.bits.cmd := reqCmd
|
||||
io.mem.req.bits.tag := reqTag
|
||||
|
||||
// On cycle when request is actually sent, print it
|
||||
when (io.mem.req.fire()) {
|
||||
// Short-hand for address
|
||||
val addr = io.mem.req.bits.addr
|
||||
// Print thread id
|
||||
printf("%d:", tid)
|
||||
// Print command
|
||||
when (reqCmd === M_XRD) {
|
||||
printf(" load-req 0x%x", addr)
|
||||
}
|
||||
when (reqCmd === M_XLR) {
|
||||
printf(" load-reserve-req 0x%x", addr)
|
||||
}
|
||||
when (reqCmd === M_XWR) {
|
||||
printf(" store-req %d 0x%x", reqData, addr)
|
||||
}
|
||||
when (reqCmd === M_XSC) {
|
||||
printf(" store-cond-req %d 0x%x", reqData, addr)
|
||||
}
|
||||
when (reqCmd === M_XA_SWAP) {
|
||||
printf(" swap-req %d 0x%x", reqData, addr)
|
||||
}
|
||||
// Print tag
|
||||
printf(" #%d", reqTag)
|
||||
// Print time
|
||||
printf(" @%d\n", cycleCount)
|
||||
}
|
||||
|
||||
// Handling of responses
|
||||
// ---------------------
|
||||
|
||||
// When a response is received
|
||||
when (io.mem.resp.valid) {
|
||||
// Put tag back in tag set
|
||||
tagMan.io.tagIn := io.mem.resp.bits.tag
|
||||
tagMan.io.put := Bool(true)
|
||||
// Print response
|
||||
printf("%d: resp %d #%d @%d\n", tid,
|
||||
io.mem.resp.bits.data, io.mem.resp.bits.tag, cycleCount)
|
||||
// Increment response count
|
||||
respCount := respCount + UInt(1)
|
||||
}
|
||||
|
||||
// Termination condition
|
||||
// ---------------------
|
||||
|
||||
val done = reqCount === UInt(numReqsPerGen) &&
|
||||
respCount === UInt(numReqsPerGen)
|
||||
|
||||
val donePulse = done && !Reg(init = Bool(false), next = done)
|
||||
|
||||
// Emit that this thread has completed
|
||||
when (donePulse) {
|
||||
printf(s"FINISHED ${numGens}\n")
|
||||
}
|
||||
|
||||
io.finished := Bool(false)
|
||||
io.timeout := reqTimer.io.timeout.valid
|
||||
}
|
||||
|
||||
class NoiseGenerator(implicit val p: Parameters) extends Module
|
||||
with HasTraceGenParams
|
||||
with HasTileLinkParameters {
|
||||
val io = new Bundle {
|
||||
val mem = new ClientUncachedTileLinkIO
|
||||
val finished = Bool(INPUT)
|
||||
}
|
||||
|
||||
val idBits = tlClientXactIdBits
|
||||
val xact_id_free = Reg(UInt(width = idBits), init = ~UInt(0, idBits))
|
||||
val xact_id_onehot = PriorityEncoderOH(xact_id_free)
|
||||
|
||||
val timer = Module(new DynamicTimer(8))
|
||||
timer.io.start := io.mem.acquire.fire()
|
||||
timer.io.period := LCG(8, io.mem.acquire.fire())
|
||||
timer.io.stop := Bool(false)
|
||||
|
||||
val s_start :: s_send :: s_wait :: s_done :: Nil = Enum(Bits(), 4)
|
||||
val state = Reg(init = s_start)
|
||||
|
||||
when (state === s_start) { state := s_send }
|
||||
when (io.mem.acquire.fire()) { state := s_wait }
|
||||
when (state === s_wait) {
|
||||
when (timer.io.timeout) { state := s_send }
|
||||
when (io.finished) { state := s_done }
|
||||
}
|
||||
|
||||
val acq_id = OHToUInt(xact_id_onehot)
|
||||
val gnt_id = io.mem.grant.bits.client_xact_id
|
||||
|
||||
xact_id_free := (xact_id_free &
|
||||
~Mux(io.mem.acquire.fire(), xact_id_onehot, UInt(0))) |
|
||||
Mux(io.mem.grant.fire(), UIntToOH(gnt_id), UInt(0))
|
||||
|
||||
val tlBlockOffset = tlBeatAddrBits + tlByteAddrBits
|
||||
val addr_idx = LCG(logAddressBagLen, io.mem.acquire.fire())
|
||||
val addr_bag = Vec(addressBag.map(
|
||||
addr => UInt(addr >> tlBlockOffset, tlBlockAddrBits)))
|
||||
val addr_block = addr_bag(addr_idx)
|
||||
val addr_beat = LCG(tlBeatAddrBits, io.mem.acquire.fire())
|
||||
val acq_select = LCG(1, io.mem.acquire.fire())
|
||||
|
||||
val get_acquire = Get(
|
||||
client_xact_id = acq_id,
|
||||
addr_block = addr_block,
|
||||
addr_beat = addr_beat)
|
||||
val put_acquire = Put(
|
||||
client_xact_id = acq_id,
|
||||
addr_block = addr_block,
|
||||
addr_beat = addr_beat,
|
||||
data = UInt(0),
|
||||
wmask = Some(UInt(0)))
|
||||
|
||||
io.mem.acquire.valid := (state === s_send) && xact_id_free.orR
|
||||
io.mem.acquire.bits := Mux(acq_select(0), get_acquire, put_acquire)
|
||||
io.mem.grant.ready := !xact_id_free(gnt_id)
|
||||
}
|
||||
|
||||
// =======================
|
||||
// Trace-generator wrapper
|
||||
// =======================
|
||||
|
||||
class GroundTestTraceGenerator(implicit p: Parameters)
|
||||
extends GroundTest()(p) with HasTraceGenParams {
|
||||
|
||||
require(io.mem.size <= 1)
|
||||
require(io.cache.size == 1)
|
||||
|
||||
val traceGen = Module(new TraceGenerator(p(GroundTestId)))
|
||||
io.cache.head <> traceGen.io.mem
|
||||
|
||||
if (io.mem.size == 1) {
|
||||
val noiseGen = Module(new NoiseGenerator)
|
||||
io.mem.head <> noiseGen.io.mem
|
||||
noiseGen.io.finished := traceGen.io.finished
|
||||
}
|
||||
|
||||
io.status.finished := traceGen.io.finished
|
||||
io.status.timeout.valid := traceGen.io.timeout
|
||||
io.status.timeout.bits := UInt(0)
|
||||
io.status.error.valid := Bool(false)
|
||||
}
|
||||
194
src/main/scala/groundtest/Util.scala
Normal file
194
src/main/scala/groundtest/Util.scala
Normal file
@@ -0,0 +1,194 @@
|
||||
package groundtest
|
||||
|
||||
import Chisel._
|
||||
|
||||
// =============
|
||||
// Dynamic timer
|
||||
// =============
|
||||
|
||||
// Timer with a dynamically-settable period.
|
||||
|
||||
class DynamicTimer(w: Int) extends Module {
|
||||
val io = new Bundle {
|
||||
val start = Bool(INPUT)
|
||||
val period = UInt(INPUT, w)
|
||||
val stop = Bool(INPUT)
|
||||
val timeout = Bool(OUTPUT)
|
||||
}
|
||||
|
||||
val countdown = Reg(init = UInt(0, w))
|
||||
val active = Reg(init = Bool(false))
|
||||
|
||||
when (io.start) {
|
||||
countdown := io.period
|
||||
active := Bool(true)
|
||||
} .elsewhen (io.stop || countdown === UInt(0)) {
|
||||
active := Bool(false)
|
||||
} .elsewhen (active) {
|
||||
countdown := countdown - UInt(1)
|
||||
}
|
||||
|
||||
io.timeout := countdown === UInt(0) && active
|
||||
}
|
||||
|
||||
// ============
|
||||
// LCG16 module
|
||||
// ============
|
||||
|
||||
// A 16-bit psuedo-random generator based on a linear conguential
|
||||
// generator (LCG). The state is stored in an unitialised register.
|
||||
// When using the C++ backend, it is straigtforward to arrange a
|
||||
// random initial value for each uninitialised register, effectively
|
||||
// seeding each LCG16 instance with a different seed.
|
||||
|
||||
class LCG16 extends Module {
|
||||
val io = new Bundle {
|
||||
val out = UInt(OUTPUT, 16)
|
||||
val inc = Bool(INPUT)
|
||||
}
|
||||
val state = Reg(UInt(width = 32))
|
||||
when (io.inc) {
|
||||
state := state * UInt(1103515245, 32) + UInt(12345, 32)
|
||||
}
|
||||
io.out := state(30, 15)
|
||||
}
|
||||
|
||||
// ==========
|
||||
// LCG module
|
||||
// ==========
|
||||
|
||||
// An n-bit psuedo-random generator made from many instances of a
|
||||
// 16-bit LCG. Parameter 'width' must be larger than 0.
|
||||
|
||||
class LCG(val w: Int) extends Module {
|
||||
val io = new Bundle {
|
||||
val out = UInt(OUTPUT, w)
|
||||
val inc = Bool(INPUT)
|
||||
}
|
||||
require(w > 0)
|
||||
val numLCG16s : Int = (w+15)/16
|
||||
val outs = Seq.fill(numLCG16s) { LCG16(io.inc) }
|
||||
io.out := Cat(outs)
|
||||
}
|
||||
|
||||
object LCG16 {
|
||||
def apply(inc: Bool = Bool(true)): UInt = {
|
||||
val lcg = Module(new LCG16)
|
||||
lcg.io.inc := inc
|
||||
lcg.io.out
|
||||
}
|
||||
}
|
||||
|
||||
object LCG {
|
||||
def apply(w: Int, inc: Bool = Bool(true)): UInt = {
|
||||
val lcg = Module(new LCG(w))
|
||||
lcg.io.inc := inc
|
||||
lcg.io.out
|
||||
}
|
||||
}
|
||||
|
||||
// ======================
|
||||
// Frequency distribution
|
||||
// ======================
|
||||
|
||||
// Given a list of (frequency, value) pairs, return a random value
|
||||
// according to the frequency distribution. The sum of the
|
||||
// frequencies in the distribution must be a power of two.
|
||||
|
||||
object Frequency {
|
||||
def apply(dist : List[(Int, Bits)]) : Bits = {
|
||||
// Distribution must be non-empty
|
||||
require(dist.length > 0)
|
||||
|
||||
// Require that the frequencies sum to a power of two
|
||||
val (freqs, vals) = dist.unzip
|
||||
val total = freqs.sum
|
||||
require(isPow2(total))
|
||||
|
||||
// First item in the distribution
|
||||
val (firstFreq, firstVal) = dist.head
|
||||
|
||||
// Result wire
|
||||
val result = Wire(Bits(width = firstVal.getWidth))
|
||||
result := UInt(0)
|
||||
|
||||
// Random value
|
||||
val randVal = LCG(log2Up(total))
|
||||
|
||||
// Pick return value
|
||||
var count = firstFreq
|
||||
var select = when (randVal < UInt(firstFreq)) { result := firstVal }
|
||||
for (p <- dist.drop(1)) {
|
||||
count = count + p._1
|
||||
select = select.elsewhen(randVal < UInt(count)) { result := p._2 }
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
}
|
||||
|
||||
object ValidMux {
|
||||
def apply[T <: Data](v1: ValidIO[T], v2: ValidIO[T]*): ValidIO[T] = {
|
||||
apply(v1 +: v2.toSeq)
|
||||
}
|
||||
def apply[T <: Data](valids: Seq[ValidIO[T]]): ValidIO[T] = {
|
||||
val out = Wire(Valid(valids.head.bits))
|
||||
out.valid := valids.map(_.valid).reduce(_ || _)
|
||||
out.bits := MuxCase(valids.head.bits,
|
||||
valids.map(v => (v.valid -> v.bits)))
|
||||
out
|
||||
}
|
||||
}
|
||||
|
||||
object DebugCombiner {
|
||||
def apply(debugs: Seq[GroundTestStatus]): GroundTestStatus = {
|
||||
val out = Wire(new GroundTestStatus)
|
||||
out.finished := debugs.map(_.finished).reduce(_ && _)
|
||||
out.timeout := ValidMux(debugs.map(_.timeout))
|
||||
out.error := ValidMux(debugs.map(_.error))
|
||||
out
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Takes in data on one decoupled interface and broadcasts it to
|
||||
* N decoupled output interfaces
|
||||
*/
|
||||
class Broadcaster[T <: Data](typ: T, n: Int) extends Module {
|
||||
val io = new Bundle {
|
||||
val in = Decoupled(typ).flip
|
||||
val out = Vec(n, Decoupled(typ))
|
||||
}
|
||||
|
||||
require (n > 0)
|
||||
|
||||
if (n == 1) {
|
||||
io.out.head <> io.in
|
||||
} else {
|
||||
val idx = Reg(init = UInt(0, log2Up(n)))
|
||||
val save = Reg(typ)
|
||||
|
||||
io.out.head.valid := idx === UInt(0) && io.in.valid
|
||||
io.out.head.bits := io.in.bits
|
||||
for (i <- 1 until n) {
|
||||
io.out(i).valid := idx === UInt(i)
|
||||
io.out(i).bits := save
|
||||
}
|
||||
io.in.ready := io.out.head.ready && idx === UInt(0)
|
||||
|
||||
when (io.in.fire()) { save := io.in.bits }
|
||||
|
||||
when (io.out(idx).fire()) {
|
||||
when (idx === UInt(n - 1)) { idx := UInt(0) }
|
||||
.otherwise { idx := idx + UInt(1) }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
object Broadcaster {
|
||||
def apply[T <: Data](in: DecoupledIO[T], n: Int): Vec[DecoupledIO[T]] = {
|
||||
val split = Module(new Broadcaster(in.bits, n))
|
||||
split.io.in <> in
|
||||
split.io.out
|
||||
}
|
||||
}
|
||||
148
src/main/scala/junctions/addrmap.scala
Normal file
148
src/main/scala/junctions/addrmap.scala
Normal file
@@ -0,0 +1,148 @@
|
||||
// See LICENSE for license details.
|
||||
|
||||
package junctions
|
||||
|
||||
import Chisel._
|
||||
import cde.{Parameters, Field}
|
||||
import scala.collection.mutable.HashMap
|
||||
|
||||
case object PAddrBits extends Field[Int]
|
||||
case object GlobalAddrMap extends Field[AddrMap]
|
||||
|
||||
trait HasAddrMapParameters {
|
||||
implicit val p: Parameters
|
||||
|
||||
val paddrBits = p(PAddrBits)
|
||||
val addrMap = p(GlobalAddrMap)
|
||||
}
|
||||
|
||||
case class MemAttr(prot: Int, cacheable: Boolean = false)
|
||||
|
||||
sealed abstract class MemRegion {
|
||||
def start: BigInt
|
||||
def size: BigInt
|
||||
def numSlaves: Int
|
||||
def attr: MemAttr
|
||||
|
||||
def containsAddress(x: UInt) = UInt(start) <= x && x < UInt(start + size)
|
||||
}
|
||||
|
||||
case class MemSize(size: BigInt, attr: MemAttr) extends MemRegion {
|
||||
def start = 0
|
||||
def numSlaves = 1
|
||||
}
|
||||
|
||||
case class MemRange(start: BigInt, size: BigInt, attr: MemAttr) extends MemRegion {
|
||||
def numSlaves = 1
|
||||
}
|
||||
|
||||
object AddrMapProt {
|
||||
val R = 0x1
|
||||
val W = 0x2
|
||||
val X = 0x4
|
||||
val RW = R | W
|
||||
val RX = R | X
|
||||
val RWX = R | W | X
|
||||
val SZ = 3
|
||||
}
|
||||
|
||||
class AddrMapProt extends Bundle {
|
||||
val x = Bool()
|
||||
val w = Bool()
|
||||
val r = Bool()
|
||||
}
|
||||
|
||||
case class AddrMapEntry(name: String, region: MemRegion)
|
||||
|
||||
object AddrMap {
|
||||
def apply(elems: AddrMapEntry*): AddrMap = new AddrMap(elems)
|
||||
}
|
||||
|
||||
class AddrMap(
|
||||
entriesIn: Seq[AddrMapEntry],
|
||||
val start: BigInt = BigInt(0),
|
||||
val collapse: Boolean = false) extends MemRegion {
|
||||
private val slavePorts = HashMap[String, Int]()
|
||||
private val mapping = HashMap[String, MemRegion]()
|
||||
|
||||
def isEmpty = entries.isEmpty
|
||||
def length = entries.size
|
||||
def numSlaves = slavePorts.size
|
||||
|
||||
val (size: BigInt, entries: Seq[AddrMapEntry], attr: MemAttr) = {
|
||||
var ind = 0
|
||||
var base = start
|
||||
var rebasedEntries = collection.mutable.ArrayBuffer[AddrMapEntry]()
|
||||
var prot = 0
|
||||
var cacheable = true
|
||||
for (AddrMapEntry(name, r) <- entriesIn) {
|
||||
if (r.start != 0) {
|
||||
val align = BigInt(1) << log2Ceil(r.size)
|
||||
require(r.start >= base, s"region $name base address 0x${r.start.toString(16)} overlaps previous base 0x${base.toString(16)}")
|
||||
base = r.start
|
||||
} else {
|
||||
base = (base + r.size - 1) / r.size * r.size
|
||||
}
|
||||
|
||||
r match {
|
||||
case r: AddrMap =>
|
||||
val subMap = new AddrMap(r.entries, base, r.collapse)
|
||||
rebasedEntries += AddrMapEntry(name, subMap)
|
||||
mapping += name -> subMap
|
||||
mapping ++= subMap.mapping.map { case (k, v) => s"$name:$k" -> v }
|
||||
if (r.collapse) {
|
||||
slavePorts += (name -> ind)
|
||||
ind += 1
|
||||
} else {
|
||||
slavePorts ++= subMap.slavePorts.map {
|
||||
case (k, v) => s"$name:$k" -> (ind + v)
|
||||
}
|
||||
ind += r.numSlaves
|
||||
}
|
||||
case _ =>
|
||||
val e = MemRange(base, r.size, r.attr)
|
||||
rebasedEntries += AddrMapEntry(name, e)
|
||||
mapping += name -> e
|
||||
slavePorts += name -> ind
|
||||
ind += r.numSlaves
|
||||
}
|
||||
|
||||
base += r.size
|
||||
prot |= r.attr.prot
|
||||
cacheable &&= r.attr.cacheable
|
||||
}
|
||||
(base - start, rebasedEntries, MemAttr(prot, cacheable))
|
||||
}
|
||||
|
||||
val flatten: Seq[AddrMapEntry] = {
|
||||
mapping.toSeq.map {
|
||||
case (name, range: MemRange) => Some(AddrMapEntry(name, range))
|
||||
case _ => None
|
||||
}.flatten.sortBy(_.region.start)
|
||||
}
|
||||
|
||||
def toRange: MemRange = MemRange(start, size, attr)
|
||||
def apply(name: String): MemRegion = mapping(name)
|
||||
def contains(name: String): Boolean = mapping.contains(name)
|
||||
def port(name: String): Int = slavePorts(name)
|
||||
def subMap(name: String): AddrMap = mapping(name).asInstanceOf[AddrMap]
|
||||
def isInRegion(name: String, addr: UInt): Bool = mapping(name).containsAddress(addr)
|
||||
|
||||
def isCacheable(addr: UInt): Bool = {
|
||||
flatten.filter(_.region.attr.cacheable).map(
|
||||
_.region.containsAddress(addr)
|
||||
).foldLeft(Bool(false))(_ || _)
|
||||
}
|
||||
|
||||
def isValid(addr: UInt): Bool = {
|
||||
flatten.map(_.region.containsAddress(addr)).foldLeft(Bool(false))(_ || _)
|
||||
}
|
||||
|
||||
def getProt(addr: UInt): AddrMapProt = {
|
||||
val protForRegion = flatten.map { entry =>
|
||||
Mux(entry.region.containsAddress(addr),
|
||||
UInt(entry.region.attr.prot, AddrMapProt.SZ), UInt(0))
|
||||
}
|
||||
new AddrMapProt().fromBits(protForRegion.reduce(_|_))
|
||||
}
|
||||
}
|
||||
333
src/main/scala/junctions/atos.scala
Normal file
333
src/main/scala/junctions/atos.scala
Normal file
@@ -0,0 +1,333 @@
|
||||
package junctions
|
||||
|
||||
import Chisel._
|
||||
import scala.math.max
|
||||
import cde.{Parameters, Field}
|
||||
|
||||
trait HasAtosParameters extends HasNastiParameters {
|
||||
// round up to a multiple of 32
|
||||
def roundup(n: Int) = 32 * ((n - 1) / 32 + 1)
|
||||
|
||||
val atosUnionBits = max(
|
||||
nastiXIdBits + nastiXDataBits + nastiWStrobeBits + 1,
|
||||
nastiXIdBits + nastiXBurstBits +
|
||||
nastiXSizeBits + nastiXLenBits + nastiXAddrBits)
|
||||
val atosIdBits = nastiXIdBits
|
||||
val atosTypBits = 2
|
||||
val atosRespBits = nastiXRespBits
|
||||
val atosDataBits = nastiXDataBits
|
||||
|
||||
val atosAddrOffset = atosIdBits
|
||||
val atosLenOffset = atosIdBits + nastiXAddrBits
|
||||
val atosSizeOffset = atosLenOffset + nastiXLenBits
|
||||
val atosBurstOffset = atosSizeOffset + nastiXSizeBits
|
||||
|
||||
val atosDataOffset = atosIdBits
|
||||
val atosStrobeOffset = nastiXDataBits + atosIdBits
|
||||
val atosLastOffset = atosStrobeOffset + nastiWStrobeBits
|
||||
|
||||
val atosRequestBits = roundup(atosTypBits + atosUnionBits)
|
||||
val atosResponseBits = roundup(atosTypBits + atosIdBits + atosRespBits + atosDataBits + 1)
|
||||
val atosRequestBytes = atosRequestBits / 8
|
||||
val atosResponseBytes = atosResponseBits / 8
|
||||
val atosRequestWords = atosRequestBytes / 4
|
||||
val atosResponseWords = atosResponseBytes / 4
|
||||
}
|
||||
|
||||
abstract class AtosModule(implicit val p: Parameters)
|
||||
extends Module with HasAtosParameters
|
||||
abstract class AtosBundle(implicit val p: Parameters)
|
||||
extends ParameterizedBundle()(p) with HasAtosParameters
|
||||
|
||||
object AtosRequest {
|
||||
def arType = UInt("b00")
|
||||
def awType = UInt("b01")
|
||||
def wType = UInt("b10")
|
||||
|
||||
def apply(typ: UInt, union: UInt)(implicit p: Parameters): AtosRequest = {
|
||||
val areq = Wire(new AtosRequest)
|
||||
areq.typ := typ
|
||||
areq.union := union
|
||||
areq
|
||||
}
|
||||
|
||||
def apply(ar: NastiReadAddressChannel)(implicit p: Parameters): AtosRequest =
|
||||
apply(arType, Cat(ar.burst, ar.size, ar.len, ar.addr, ar.id))
|
||||
|
||||
def apply(aw: NastiWriteAddressChannel)(implicit p: Parameters): AtosRequest =
|
||||
apply(awType, Cat(aw.burst, aw.size, aw.len, aw.addr, aw.id))
|
||||
|
||||
def apply(w: NastiWriteDataChannel)(implicit p: Parameters): AtosRequest =
|
||||
apply(wType, Cat(w.last, w.strb, w.data, w.id))
|
||||
}
|
||||
|
||||
class AtosRequest(implicit p: Parameters)
|
||||
extends AtosBundle()(p) with Serializable {
|
||||
val typ = UInt(width = atosTypBits)
|
||||
val union = UInt(width = atosUnionBits)
|
||||
|
||||
def burst(dummy: Int = 0) =
|
||||
union(atosUnionBits - 1, atosBurstOffset)
|
||||
|
||||
def size(dummy: Int = 0) =
|
||||
union(atosBurstOffset - 1, atosSizeOffset)
|
||||
|
||||
def len(dummy: Int = 0) =
|
||||
union(atosSizeOffset - 1, atosLenOffset)
|
||||
|
||||
def addr(dummy: Int = 0) =
|
||||
union(atosLenOffset - 1, atosAddrOffset)
|
||||
|
||||
def id(dummy: Int = 0) =
|
||||
union(atosIdBits - 1, 0)
|
||||
|
||||
def data(dummy: Int = 0) =
|
||||
union(atosStrobeOffset - 1, atosDataOffset)
|
||||
|
||||
def strb(dummy: Int = 0) =
|
||||
union(atosLastOffset - 1, atosStrobeOffset)
|
||||
|
||||
def last(dummy: Int = 0) =
|
||||
union(atosLastOffset)
|
||||
|
||||
def has_addr(dummy: Int = 0) =
|
||||
typ === AtosRequest.arType || typ === AtosRequest.awType
|
||||
|
||||
def has_data(dummy: Int = 0) =
|
||||
typ === AtosRequest.wType
|
||||
|
||||
def is_last(dummy: Int = 0) =
|
||||
typ === AtosRequest.arType || (typ === AtosRequest.wType && last())
|
||||
|
||||
def nbits: Int = atosRequestBits
|
||||
|
||||
def resp_len(dummy: Int = 0) =
|
||||
MuxLookup(typ, UInt(0), Seq(
|
||||
AtosRequest.arType -> (len() + UInt(1)),
|
||||
AtosRequest.awType -> UInt(1)))
|
||||
}
|
||||
|
||||
object AtosResponse {
|
||||
def rType = UInt("b00")
|
||||
def bType = UInt("b01")
|
||||
|
||||
def apply(typ: UInt, id: UInt, resp: UInt, data: UInt, last: Bool)
|
||||
(implicit p: Parameters): AtosResponse = {
|
||||
val aresp = Wire(new AtosResponse)
|
||||
aresp.typ := typ
|
||||
aresp.id := id
|
||||
aresp.resp := resp
|
||||
aresp.data := data
|
||||
aresp.last := last
|
||||
aresp
|
||||
}
|
||||
|
||||
def apply(r: NastiReadDataChannel)(implicit p: Parameters): AtosResponse =
|
||||
apply(rType, r.id, r.resp, r.data, r.last)
|
||||
|
||||
def apply(b: NastiWriteResponseChannel)(implicit p: Parameters): AtosResponse =
|
||||
apply(bType, b.id, b.resp, UInt(0), Bool(false))
|
||||
}
|
||||
|
||||
class AtosResponse(implicit p: Parameters)
|
||||
extends AtosBundle()(p) with Serializable {
|
||||
val typ = UInt(width = atosTypBits)
|
||||
val id = UInt(width = atosIdBits)
|
||||
val resp = UInt(width = atosRespBits)
|
||||
val last = Bool()
|
||||
val data = UInt(width = atosDataBits)
|
||||
|
||||
def has_data(dummy: Int = 0) = typ === AtosResponse.rType
|
||||
|
||||
def is_last(dummy: Int = 0) = !has_data() || last
|
||||
|
||||
def nbits: Int = atosResponseBits
|
||||
}
|
||||
|
||||
class AtosIO(implicit p: Parameters) extends AtosBundle()(p) {
|
||||
val req = Decoupled(new AtosRequest)
|
||||
val resp = Decoupled(new AtosResponse).flip
|
||||
}
|
||||
|
||||
class AtosRequestEncoder(implicit p: Parameters) extends AtosModule()(p) {
|
||||
val io = new Bundle {
|
||||
val ar = Decoupled(new NastiReadAddressChannel).flip
|
||||
val aw = Decoupled(new NastiWriteAddressChannel).flip
|
||||
val w = Decoupled(new NastiWriteDataChannel).flip
|
||||
val req = Decoupled(new AtosRequest)
|
||||
}
|
||||
|
||||
val writing = Reg(init = Bool(false))
|
||||
|
||||
io.ar.ready := !writing && io.req.ready
|
||||
io.aw.ready := !writing && !io.ar.valid && io.req.ready
|
||||
io.w.ready := writing && io.req.ready
|
||||
|
||||
io.req.valid := Mux(writing, io.w.valid, io.ar.valid || io.aw.valid)
|
||||
io.req.bits := Mux(writing, AtosRequest(io.w.bits),
|
||||
Mux(io.ar.valid, AtosRequest(io.ar.bits), AtosRequest(io.aw.bits)))
|
||||
|
||||
when (io.aw.fire()) { writing := Bool(true) }
|
||||
when (io.w.fire() && io.w.bits.last) { writing := Bool(false) }
|
||||
}
|
||||
|
||||
class AtosResponseDecoder(implicit p: Parameters) extends AtosModule()(p) {
|
||||
val io = new Bundle {
|
||||
val resp = Decoupled(new AtosResponse).flip
|
||||
val b = Decoupled(new NastiWriteResponseChannel)
|
||||
val r = Decoupled(new NastiReadDataChannel)
|
||||
}
|
||||
|
||||
val is_b = io.resp.bits.typ === AtosResponse.bType
|
||||
val is_r = io.resp.bits.typ === AtosResponse.rType
|
||||
|
||||
io.b.valid := io.resp.valid && is_b
|
||||
io.b.bits := NastiWriteResponseChannel(
|
||||
id = io.resp.bits.id,
|
||||
resp = io.resp.bits.resp)
|
||||
|
||||
io.r.valid := io.resp.valid && is_r
|
||||
io.r.bits := NastiReadDataChannel(
|
||||
id = io.resp.bits.id,
|
||||
data = io.resp.bits.data,
|
||||
last = io.resp.bits.last,
|
||||
resp = io.resp.bits.resp)
|
||||
|
||||
io.resp.ready := (is_b && io.b.ready) || (is_r && io.r.ready)
|
||||
}
|
||||
|
||||
class AtosClientConverter(implicit p: Parameters) extends AtosModule()(p) {
|
||||
val io = new Bundle {
|
||||
val nasti = (new NastiIO).flip
|
||||
val atos = new AtosIO
|
||||
}
|
||||
|
||||
val req_enc = Module(new AtosRequestEncoder)
|
||||
req_enc.io.ar <> io.nasti.ar
|
||||
req_enc.io.aw <> io.nasti.aw
|
||||
req_enc.io.w <> io.nasti.w
|
||||
io.atos.req <> req_enc.io.req
|
||||
|
||||
val resp_dec = Module(new AtosResponseDecoder)
|
||||
resp_dec.io.resp <> io.atos.resp
|
||||
io.nasti.b <> resp_dec.io.b
|
||||
io.nasti.r <> resp_dec.io.r
|
||||
}
|
||||
|
||||
class AtosRequestDecoder(implicit p: Parameters) extends AtosModule()(p) {
|
||||
val io = new Bundle {
|
||||
val req = Decoupled(new AtosRequest).flip
|
||||
val ar = Decoupled(new NastiReadAddressChannel)
|
||||
val aw = Decoupled(new NastiWriteAddressChannel)
|
||||
val w = Decoupled(new NastiWriteDataChannel)
|
||||
}
|
||||
|
||||
val is_ar = io.req.bits.typ === AtosRequest.arType
|
||||
val is_aw = io.req.bits.typ === AtosRequest.awType
|
||||
val is_w = io.req.bits.typ === AtosRequest.wType
|
||||
|
||||
io.ar.valid := io.req.valid && is_ar
|
||||
io.ar.bits := NastiReadAddressChannel(
|
||||
id = io.req.bits.id(),
|
||||
addr = io.req.bits.addr(),
|
||||
size = io.req.bits.size(),
|
||||
len = io.req.bits.len(),
|
||||
burst = io.req.bits.burst())
|
||||
|
||||
io.aw.valid := io.req.valid && is_aw
|
||||
io.aw.bits := NastiWriteAddressChannel(
|
||||
id = io.req.bits.id(),
|
||||
addr = io.req.bits.addr(),
|
||||
size = io.req.bits.size(),
|
||||
len = io.req.bits.len(),
|
||||
burst = io.req.bits.burst())
|
||||
|
||||
io.w.valid := io.req.valid && is_w
|
||||
io.w.bits := NastiWriteDataChannel(
|
||||
id = io.req.bits.id(),
|
||||
data = io.req.bits.data(),
|
||||
strb = Some(io.req.bits.strb()),
|
||||
last = io.req.bits.last())
|
||||
|
||||
io.req.ready := (io.ar.ready && is_ar) ||
|
||||
(io.aw.ready && is_aw) ||
|
||||
(io.w.ready && is_w)
|
||||
}
|
||||
|
||||
class AtosResponseEncoder(implicit p: Parameters) extends AtosModule()(p) {
|
||||
val io = new Bundle {
|
||||
val b = Decoupled(new NastiWriteResponseChannel).flip
|
||||
val r = Decoupled(new NastiReadDataChannel).flip
|
||||
val resp = Decoupled(new AtosResponse)
|
||||
}
|
||||
|
||||
val locked = Reg(init = Bool(false))
|
||||
|
||||
io.resp.valid := (io.b.valid && !locked) || io.r.valid
|
||||
io.resp.bits := Mux(io.r.valid,
|
||||
AtosResponse(io.r.bits), AtosResponse(io.b.bits))
|
||||
|
||||
io.b.ready := !locked && !io.r.valid && io.resp.ready
|
||||
io.r.ready := io.resp.ready
|
||||
|
||||
when (io.r.fire() && !io.r.bits.last) { locked := Bool(true) }
|
||||
when (io.r.fire() && io.r.bits.last) { locked := Bool(false) }
|
||||
}
|
||||
|
||||
class AtosManagerConverter(implicit p: Parameters) extends AtosModule()(p) {
|
||||
val io = new Bundle {
|
||||
val atos = (new AtosIO).flip
|
||||
val nasti = new NastiIO
|
||||
}
|
||||
|
||||
val req_dec = Module(new AtosRequestDecoder)
|
||||
val resp_enc = Module(new AtosResponseEncoder)
|
||||
|
||||
req_dec.io.req <> io.atos.req
|
||||
io.atos.resp <> resp_enc.io.resp
|
||||
|
||||
io.nasti.ar <> req_dec.io.ar
|
||||
io.nasti.aw <> req_dec.io.aw
|
||||
io.nasti.w <> req_dec.io.w
|
||||
|
||||
resp_enc.io.b <> io.nasti.b
|
||||
resp_enc.io.r <> io.nasti.r
|
||||
}
|
||||
|
||||
class AtosSerializedIO(w: Int)(implicit p: Parameters) extends ParameterizedBundle()(p) {
|
||||
val req = Decoupled(Bits(width = w))
|
||||
val resp = Decoupled(Bits(width = w)).flip
|
||||
val clk = Bool(OUTPUT)
|
||||
val clk_edge = Bool(OUTPUT)
|
||||
override def cloneType = new AtosSerializedIO(w)(p).asInstanceOf[this.type]
|
||||
}
|
||||
|
||||
class AtosSerdes(w: Int)(implicit p: Parameters) extends AtosModule()(p) {
|
||||
val io = new Bundle {
|
||||
val wide = (new AtosIO).flip
|
||||
val narrow = new AtosSerializedIO(w)
|
||||
}
|
||||
|
||||
val ser = Module(new Serializer(w, new AtosRequest))
|
||||
ser.io.in <> io.wide.req
|
||||
io.narrow.req <> ser.io.out
|
||||
|
||||
val des = Module(new Deserializer(w, new AtosResponse))
|
||||
des.io.in <> io.narrow.resp
|
||||
io.wide.resp <> des.io.out
|
||||
}
|
||||
|
||||
class AtosDesser(w: Int)(implicit p: Parameters) extends AtosModule()(p) {
|
||||
val io = new Bundle {
|
||||
val narrow = new AtosSerializedIO(w).flip
|
||||
val wide = new AtosIO
|
||||
}
|
||||
|
||||
val des = Module(new Deserializer(w, new AtosRequest))
|
||||
des.io.in <> io.narrow.req
|
||||
io.wide.req <> des.io.out
|
||||
|
||||
val ser = Module(new Serializer(w, new AtosResponse))
|
||||
ser.io.in <> io.wide.resp
|
||||
io.narrow.resp <> ser.io.out
|
||||
}
|
||||
150
src/main/scala/junctions/crossing.scala
Normal file
150
src/main/scala/junctions/crossing.scala
Normal file
@@ -0,0 +1,150 @@
|
||||
package junctions
|
||||
import Chisel._
|
||||
|
||||
class Crossing[T <: Data](gen: T, enq_sync: Boolean, deq_sync: Boolean) extends Bundle {
|
||||
val enq = Decoupled(gen).flip()
|
||||
val deq = Decoupled(gen)
|
||||
val enq_clock = if (enq_sync) Some(Clock(INPUT)) else None
|
||||
val deq_clock = if (deq_sync) Some(Clock(INPUT)) else None
|
||||
val enq_reset = if (enq_sync) Some(Bool(INPUT)) else None
|
||||
val deq_reset = if (deq_sync) Some(Bool(INPUT)) else None
|
||||
}
|
||||
|
||||
// Output is 1 for one cycle after any edge of 'in'
|
||||
object AsyncHandshakePulse {
|
||||
def apply(in: Bool, sync: Int): Bool = {
|
||||
val syncv = RegInit(Vec.fill(sync+1){Bool(false)})
|
||||
syncv.last := in
|
||||
(syncv.init zip syncv.tail).foreach { case (sink, source) => sink := source }
|
||||
syncv(0) =/= syncv(1)
|
||||
}
|
||||
}
|
||||
|
||||
class AsyncHandshakeSource[T <: Data](gen: T, sync: Int, clock: Clock, reset: Bool)
|
||||
extends Module(_clock = clock, _reset = reset) {
|
||||
val io = new Bundle {
|
||||
// These come from the source clock domain
|
||||
val enq = Decoupled(gen).flip()
|
||||
// These cross to the sink clock domain
|
||||
val bits = gen.cloneType.asOutput
|
||||
val push = Bool(OUTPUT)
|
||||
val pop = Bool(INPUT)
|
||||
}
|
||||
|
||||
val ready = RegInit(Bool(true))
|
||||
val bits = Reg(gen)
|
||||
val push = RegInit(Bool(false))
|
||||
|
||||
io.enq.ready := ready
|
||||
io.bits := bits
|
||||
io.push := push
|
||||
|
||||
val pop = AsyncHandshakePulse(io.pop, sync)
|
||||
assert (!pop || !ready)
|
||||
|
||||
when (pop) {
|
||||
ready := Bool(true)
|
||||
}
|
||||
|
||||
when (io.enq.fire()) {
|
||||
ready := Bool(false)
|
||||
bits := io.enq.bits
|
||||
push := !push
|
||||
}
|
||||
}
|
||||
|
||||
class AsyncHandshakeSink[T <: Data](gen: T, sync: Int, clock: Clock, reset: Bool)
|
||||
extends Module(_clock = clock, _reset = reset) {
|
||||
val io = new Bundle {
|
||||
// These cross to the source clock domain
|
||||
val bits = gen.cloneType.asInput
|
||||
val push = Bool(INPUT)
|
||||
val pop = Bool(OUTPUT)
|
||||
// These go to the sink clock domain
|
||||
val deq = Decoupled(gen)
|
||||
}
|
||||
|
||||
val valid = RegInit(Bool(false))
|
||||
val bits = Reg(gen)
|
||||
val pop = RegInit(Bool(false))
|
||||
|
||||
io.deq.valid := valid
|
||||
io.deq.bits := bits
|
||||
io.pop := pop
|
||||
|
||||
val push = AsyncHandshakePulse(io.push, sync)
|
||||
assert (!push || !valid)
|
||||
|
||||
when (push) {
|
||||
valid := Bool(true)
|
||||
bits := io.bits
|
||||
}
|
||||
|
||||
when (io.deq.fire()) {
|
||||
valid := Bool(false)
|
||||
pop := !pop
|
||||
}
|
||||
}
|
||||
|
||||
class AsyncHandshake[T <: Data](gen: T, sync: Int = 2) extends Module {
|
||||
val io = new Crossing(gen, true, true)
|
||||
require (sync >= 2)
|
||||
|
||||
val source = Module(new AsyncHandshakeSource(gen, sync, io.enq_clock.get, io.enq_reset.get))
|
||||
val sink = Module(new AsyncHandshakeSink (gen, sync, io.deq_clock.get, io.deq_reset.get))
|
||||
|
||||
source.io.enq <> io.enq
|
||||
io.deq <> sink.io.deq
|
||||
|
||||
sink.io.bits := source.io.bits
|
||||
sink.io.push := source.io.push
|
||||
source.io.pop := sink.io.pop
|
||||
}
|
||||
|
||||
class AsyncDecoupledTo[T <: Data](gen: T, depth: Int = 0, sync: Int = 2) extends Module {
|
||||
val io = new Crossing(gen, false, true)
|
||||
|
||||
// !!! if depth == 0 { use Handshake } else { use AsyncFIFO }
|
||||
val crossing = Module(new AsyncHandshake(gen, sync)).io
|
||||
crossing.enq_clock.get := clock
|
||||
crossing.enq_reset.get := reset
|
||||
crossing.enq <> io.enq
|
||||
crossing.deq_clock.get := io.deq_clock.get
|
||||
crossing.deq_reset.get := io.deq_reset.get
|
||||
io.deq <> crossing.deq
|
||||
}
|
||||
|
||||
object AsyncDecoupledTo {
|
||||
// source is in our clock domain, output is in the 'to' clock domain
|
||||
def apply[T <: Data](to_clock: Clock, to_reset: Bool, source: DecoupledIO[T], depth: Int = 0, sync: Int = 2): DecoupledIO[T] = {
|
||||
val to = Module(new AsyncDecoupledTo(source.bits, depth, sync))
|
||||
to.io.deq_clock.get := to_clock
|
||||
to.io.deq_reset.get := to_reset
|
||||
to.io.enq <> source
|
||||
to.io.deq
|
||||
}
|
||||
}
|
||||
|
||||
class AsyncDecoupledFrom[T <: Data](gen: T, depth: Int = 0, sync: Int = 2) extends Module {
|
||||
val io = new Crossing(gen, true, false)
|
||||
|
||||
// !!! if depth == 0 { use Handshake } else { use AsyncFIFO }
|
||||
val crossing = Module(new AsyncHandshake(gen, sync)).io
|
||||
crossing.enq_clock.get := io.enq_clock.get
|
||||
crossing.enq_reset.get := io.enq_reset.get
|
||||
crossing.enq <> io.enq
|
||||
crossing.deq_clock.get := clock
|
||||
crossing.deq_reset.get := reset
|
||||
io.deq <> crossing.deq
|
||||
}
|
||||
|
||||
object AsyncDecoupledFrom {
|
||||
// source is in the 'from' clock domain, output is in our clock domain
|
||||
def apply[T <: Data](from_clock: Clock, from_reset: Bool, source: DecoupledIO[T], depth: Int = 0, sync: Int = 2): DecoupledIO[T] = {
|
||||
val from = Module(new AsyncDecoupledFrom(source.bits, depth, sync))
|
||||
from.io.enq_clock.get := from_clock
|
||||
from.io.enq_reset.get := from_reset
|
||||
from.io.enq <> source
|
||||
from.io.deq
|
||||
}
|
||||
}
|
||||
549
src/main/scala/junctions/hasti.scala
Normal file
549
src/main/scala/junctions/hasti.scala
Normal file
@@ -0,0 +1,549 @@
|
||||
package junctions
|
||||
|
||||
import Chisel._
|
||||
import cde.{Parameters, Field}
|
||||
|
||||
object HastiConstants
|
||||
{
|
||||
// Values for htrans
|
||||
val SZ_HTRANS = 2
|
||||
val HTRANS_IDLE = UInt(0, SZ_HTRANS) // No transfer requested, not in a burst
|
||||
val HTRANS_BUSY = UInt(1, SZ_HTRANS) // No transfer requested, in a burst
|
||||
val HTRANS_NONSEQ = UInt(2, SZ_HTRANS) // First (potentially only) request in a burst
|
||||
val HTRANS_SEQ = UInt(3, SZ_HTRANS) // Following requests in a burst
|
||||
|
||||
// Values for hburst
|
||||
val SZ_HBURST = 3
|
||||
val HBURST_SINGLE = UInt(0, SZ_HBURST) // Single access (no burst)
|
||||
val HBURST_INCR = UInt(1, SZ_HBURST) // Incrementing burst of arbitrary length, not crossing 1KB
|
||||
val HBURST_WRAP4 = UInt(2, SZ_HBURST) // 4-beat wrapping burst
|
||||
val HBURST_INCR4 = UInt(3, SZ_HBURST) // 4-beat incrementing burst
|
||||
val HBURST_WRAP8 = UInt(4, SZ_HBURST) // 8-beat wrapping burst
|
||||
val HBURST_INCR8 = UInt(5, SZ_HBURST) // 8-beat incrementing burst
|
||||
val HBURST_WRAP16 = UInt(6, SZ_HBURST) // 16-beat wrapping burst
|
||||
val HBURST_INCR16 = UInt(7, SZ_HBURST) // 16-beat incrementing burst
|
||||
|
||||
// Values for hresp
|
||||
val SZ_HRESP = 1
|
||||
val HRESP_OKAY = UInt(0, SZ_HRESP)
|
||||
val HRESP_ERROR = UInt(1, SZ_HRESP)
|
||||
|
||||
// Values for hsize are identical to TileLink MT_SZ
|
||||
// ie: 8*2^SZ_HSIZE bit transfers
|
||||
val SZ_HSIZE = 3
|
||||
|
||||
// Values for hprot (a bitmask)
|
||||
val SZ_HPROT = 4
|
||||
def HPROT_DATA = UInt("b0001") // Data access or Opcode fetch
|
||||
def HPROT_PRIVILEGED = UInt("b0010") // Privileged or User access
|
||||
def HPROT_BUFFERABLE = UInt("b0100") // Bufferable or non-bufferable
|
||||
def HPROT_CACHEABLE = UInt("b1000") // Cacheable or non-cacheable
|
||||
|
||||
def dgate(valid: Bool, b: UInt) = Fill(b.getWidth, valid) & b
|
||||
}
|
||||
|
||||
import HastiConstants._
|
||||
|
||||
case class HastiParameters(dataBits: Int, addrBits: Int)
|
||||
case object HastiId extends Field[String]
|
||||
case class HastiKey(id: String) extends Field[HastiParameters]
|
||||
|
||||
trait HasHastiParameters {
|
||||
implicit val p: Parameters
|
||||
val hastiParams = p(HastiKey(p(HastiId)))
|
||||
val hastiAddrBits = hastiParams.addrBits
|
||||
val hastiDataBits = hastiParams.dataBits
|
||||
val hastiDataBytes = hastiDataBits/8
|
||||
val hastiAlignment = log2Ceil(hastiDataBytes)
|
||||
}
|
||||
|
||||
abstract class HastiModule(implicit val p: Parameters) extends Module
|
||||
with HasHastiParameters
|
||||
abstract class HastiBundle(implicit val p: Parameters) extends ParameterizedBundle()(p)
|
||||
with HasHastiParameters
|
||||
|
||||
class HastiMasterIO(implicit p: Parameters) extends HastiBundle()(p) {
|
||||
val htrans = UInt(OUTPUT, SZ_HTRANS)
|
||||
val hmastlock = Bool(OUTPUT)
|
||||
val haddr = UInt(OUTPUT, hastiAddrBits)
|
||||
val hwrite = Bool(OUTPUT)
|
||||
val hburst = UInt(OUTPUT, SZ_HBURST)
|
||||
val hsize = UInt(OUTPUT, SZ_HSIZE)
|
||||
val hprot = UInt(OUTPUT, SZ_HPROT)
|
||||
|
||||
val hwdata = Bits(OUTPUT, hastiDataBits)
|
||||
val hrdata = Bits(INPUT, hastiDataBits)
|
||||
|
||||
val hready = Bool(INPUT)
|
||||
val hresp = UInt(INPUT, SZ_HRESP)
|
||||
|
||||
def isNSeq(dummy:Int=0) = htrans === HTRANS_NONSEQ // SEQ does not start a NEW request
|
||||
def isHold(dummy:Int=0) = htrans === HTRANS_BUSY || htrans === HTRANS_SEQ
|
||||
def isIdle(dummy:Int=0) = htrans === HTRANS_IDLE || htrans === HTRANS_BUSY
|
||||
}
|
||||
|
||||
class HastiSlaveIO(implicit p: Parameters) extends HastiBundle()(p) {
|
||||
val htrans = UInt(INPUT, SZ_HTRANS)
|
||||
val hmastlock = Bool(INPUT)
|
||||
val haddr = UInt(INPUT, hastiAddrBits)
|
||||
val hwrite = Bool(INPUT)
|
||||
val hburst = UInt(INPUT, SZ_HBURST)
|
||||
val hsize = UInt(INPUT, SZ_HSIZE)
|
||||
val hprot = UInt(INPUT, SZ_HPROT)
|
||||
|
||||
val hwdata = Bits(INPUT, hastiDataBits)
|
||||
val hrdata = Bits(OUTPUT, hastiDataBits)
|
||||
|
||||
val hsel = Bool(INPUT)
|
||||
val hready = Bool(OUTPUT)
|
||||
val hresp = UInt(OUTPUT, SZ_HRESP)
|
||||
}
|
||||
|
||||
/* A diverted master is told hready when his address phase goes nowhere.
|
||||
* In this case, we buffer his address phase request and replay it later.
|
||||
* NOTE: this must optimize to nothing when divert is constantly false.
|
||||
*/
|
||||
class MasterDiversion(implicit p: Parameters) extends HastiModule()(p) {
|
||||
val io = new Bundle {
|
||||
val in = (new HastiMasterIO).flip
|
||||
val out = (new HastiMasterIO)
|
||||
val divert = Bool(INPUT)
|
||||
}
|
||||
|
||||
val full = Reg(init = Bool(false))
|
||||
val buffer = Reg(new HastiMasterIO)
|
||||
|
||||
when (io.out.hready) {
|
||||
full := Bool(false)
|
||||
}
|
||||
when (io.divert) {
|
||||
full := Bool(true)
|
||||
buffer := io.in
|
||||
}
|
||||
|
||||
// If the master is diverted, he must also have been told hready
|
||||
assert (!io.divert || io.in.hready,
|
||||
"Diverted but not ready");
|
||||
|
||||
// Replay the request we diverted
|
||||
io.out.htrans := Mux(full, buffer.htrans, io.in.htrans)
|
||||
io.out.hmastlock := Mux(full, buffer.hmastlock, io.in.hmastlock)
|
||||
io.out.haddr := Mux(full, buffer.haddr, io.in.haddr)
|
||||
io.out.hwrite := Mux(full, buffer.hwrite, io.in.hwrite)
|
||||
io.out.hburst := Mux(full, buffer.hburst, io.in.hburst)
|
||||
io.out.hsize := Mux(full, buffer.hsize, io.in.hsize)
|
||||
io.out.hprot := Mux(full, buffer.hprot, io.in.hprot)
|
||||
io.out.hwdata := Mux(full, buffer.hwdata, io.in.hwdata)
|
||||
|
||||
// Pass slave response back
|
||||
io.in.hrdata := io.out.hrdata
|
||||
io.in.hresp := io.out.hresp
|
||||
io.in.hready := io.out.hready && !full // Block master while we steal his address phase
|
||||
}
|
||||
|
||||
/* Masters with lower index have priority over higher index masters.
|
||||
* However, a lower priority master will retain control of a slave when EITHER:
|
||||
* 1. a burst is in progress (switching slaves mid-burst violates AHB-lite at slave)
|
||||
* 2. a transfer was waited (the standard forbids changing requests in this case)
|
||||
*
|
||||
* If a master raises hmastlock, it will be waited until no other master has inflight
|
||||
* requests; then, it acquires exclusive control of the crossbar until hmastlock is low.
|
||||
*
|
||||
* To implement an AHB-lite crossbar, it is important to realize that requests and
|
||||
* responses are coupled. Unlike modern bus protocols where the response data has flow
|
||||
* control independent of the request data, in AHB-lite, both flow at the same time at
|
||||
* the sole discretion of the slave via the hready signal. The address and data are
|
||||
* delivered on two back-to-back cycles, the so-called address and data phases.
|
||||
*
|
||||
* Masters can only be connected to a single slave at a time. If a master had two different
|
||||
* slave connections on the address and data phases, there would be two independent hready
|
||||
* signals. An AHB-lite slave can assume that data flows when it asserts hready. If the data
|
||||
* slave deasserts hready while the address slave asserts hready, the master is put in the
|
||||
* impossible position of being in data phase on two slaves at once. For this reason, when
|
||||
* a master issues back-to-back accesses to distinct slaves, we inject a pipeline bubble
|
||||
* between the two requests to limit the master to just a single slave at a time.
|
||||
*
|
||||
* Conversely, a slave CAN have two masters attached to it. This is unproblematic, because
|
||||
* the only signal which governs data flow is hready. Thus, both masters can be stalled
|
||||
* safely by the single slave.
|
||||
*/
|
||||
class HastiXbar(nMasters: Int, addressMap: Seq[UInt=>Bool])(implicit p: Parameters) extends HastiModule()(p) {
|
||||
val io = new Bundle {
|
||||
val masters = Vec(nMasters, new HastiMasterIO).flip
|
||||
val slaves = Vec(addressMap.size, new HastiSlaveIO).flip
|
||||
}
|
||||
|
||||
val nSlaves = addressMap.size
|
||||
|
||||
// Setup diversions infront of each master
|
||||
val diversions = Seq.tabulate(nMasters) { m => Module(new MasterDiversion) }
|
||||
(io.masters zip diversions) foreach { case (m, d) => d.io.in <> m }
|
||||
|
||||
// Handy short-hand
|
||||
val masters = diversions map (_.io.out)
|
||||
val slaves = io.slaves
|
||||
|
||||
// Lock status of the crossbar
|
||||
val lockedM = Reg(init = Vec.fill(nMasters)(Bool(false)))
|
||||
val isLocked = lockedM.reduce(_ || _)
|
||||
|
||||
// This matrix governs the master-slave connections in the address phase
|
||||
// It is indexed by addressPhaseGrantSM(slave)(master)
|
||||
// It is guaranteed to have at most one 'true' per column and per row
|
||||
val addressPhaseGrantSM = Wire(Vec(nSlaves, Vec(nMasters, Bool())))
|
||||
// This matrix governs the master-slave connections in the data phase
|
||||
// It is guaranteed to have at most one 'true' per column and per row
|
||||
val dataPhaseGrantSM = Reg (init = Vec.fill(nSlaves)(Vec.fill(nMasters)(Bool(false))))
|
||||
// This matrix is the union of the address and data phases.
|
||||
// It is transposed with respect to the two previous matrices.
|
||||
// It is guaranteed to contain at most one 'true' per master row.
|
||||
// However, two 'true's per slave column are permitted.
|
||||
val unionGrantMS = Vec.tabulate(nMasters) { m => Vec.tabulate(nSlaves) { s =>
|
||||
addressPhaseGrantSM(s)(m) || dataPhaseGrantSM(s)(m) } }
|
||||
|
||||
// Confirm the guarantees made above
|
||||
def justOnce(v: Vec[Bool]) = v.fold(Bool(false)) { case (p, v) =>
|
||||
assert (!p || !v)
|
||||
p || v
|
||||
}
|
||||
addressPhaseGrantSM foreach { s => justOnce(s) }
|
||||
unionGrantMS foreach { s => justOnce(s) }
|
||||
|
||||
// Data phase follows address phase whenever the slave is ready
|
||||
(slaves zip (dataPhaseGrantSM zip addressPhaseGrantSM)) foreach { case (s, (d, a)) =>
|
||||
when (s.hready) { d := a }
|
||||
}
|
||||
|
||||
// Record the grant state from the previous cycle; needed in case we hold access
|
||||
val priorAddressPhaseGrantSM = RegNext(addressPhaseGrantSM)
|
||||
|
||||
// If a master says BUSY or SEQ, it is in the middle of a burst.
|
||||
// In this case, it MUST stay attached to the same slave as before.
|
||||
// Otherwise, it would violate the AHB-lite specification as seen by
|
||||
// the slave, which is guaranteed a complete burst of the promised length.
|
||||
// One case where this matters is preventing preemption of low-prio masters.
|
||||
// NOTE: this exposes a slave to bad addresses when a master is buggy
|
||||
val holdBurstM = Vec(masters map { _.isHold() })
|
||||
|
||||
// Transform the burst hold requirement from master indexing to slave indexing
|
||||
// We use the previous cycle's binding because the master continues the prior burst
|
||||
val holdBurstS = Vec(priorAddressPhaseGrantSM map { m => Mux1H(m, holdBurstM) })
|
||||
|
||||
// If a slave says !hready to a request, it must retain the same master next cycle.
|
||||
// The AHB-lite specification requires that a waited transfer remain unchanged.
|
||||
// If we preempted a waited master, the new master's request could potentially differ.
|
||||
val holdBusyS = RegNext(Vec(slaves map { s => !s.hready && s.hsel }))
|
||||
|
||||
// Combine the above two grounds to determine if the slave retains its prior master
|
||||
val holdS = Vec((holdBurstS zip holdBusyS) map ({ case (a,b) => a||b }))
|
||||
|
||||
// Determine which master addresses match which slaves
|
||||
val matchMS = Vec(masters map { m => Vec(addressMap map { afn => afn(m.haddr) }) })
|
||||
// Detect requests to nowhere; we need to allow progress in this case
|
||||
val nowhereM = Vec(matchMS map { s => !s.reduce(_ || _) })
|
||||
|
||||
// Detect if we need to inject a pipeline bubble between the master requests.
|
||||
// Divert masters already granted a data phase different from next request.
|
||||
// NOTE: if only one slave, matchMS is always true => bubble always false
|
||||
// => the diversion registers are optimized away as they are unread
|
||||
// NOTE: bubble => dataPhase => have an hready signal
|
||||
val bubbleM =
|
||||
Vec.tabulate(nMasters) { m =>
|
||||
Vec.tabulate(nSlaves) { s => dataPhaseGrantSM(s)(m) && !matchMS(m)(s) }
|
||||
.reduce(_ || _) }
|
||||
|
||||
// Block any request that requires bus ownership or conflicts with isLocked
|
||||
val blockedM =
|
||||
Vec((lockedM zip masters) map { case(l, m) => !l && (isLocked || m.hmastlock) })
|
||||
|
||||
// Requested access to slaves from masters (pre-arbitration)
|
||||
// NOTE: isNSeq does NOT include SEQ; thus, masters who are midburst do not
|
||||
// request access to a new slave. They stay tied to the old and do not get two.
|
||||
// NOTE: if a master was waited, it must repeat the same request as last cycle;
|
||||
// thus, it will request the same slave and not end up with two (unless buggy).
|
||||
val NSeq = masters.map(_.isNSeq())
|
||||
val requestSM = Vec.tabulate(nSlaves) { s => Vec.tabulate(nMasters) { m =>
|
||||
matchMS(m)(s) && NSeq(m) && !bubbleM(m) && !blockedM(m) } }
|
||||
|
||||
// Select at most one master request per slave (lowest index = highest priority)
|
||||
val selectedRequestSM = Vec(requestSM map { m => Vec(PriorityEncoderOH(m)) })
|
||||
|
||||
// Calculate new crossbar interconnect state
|
||||
addressPhaseGrantSM := Vec((holdS zip (priorAddressPhaseGrantSM zip selectedRequestSM))
|
||||
map { case (h, (p, r)) => Mux(h, p, r) })
|
||||
|
||||
for (m <- 0 until nMasters) {
|
||||
// If the master is connected to a slave, the slave determines hready.
|
||||
// However, if no slave is connected, for progress report ready anyway, if:
|
||||
// bad address (swallow request) OR idle (permit stupid masters to move FSM)
|
||||
val autoready = nowhereM(m) || masters(m).isIdle()
|
||||
val hready = Mux1H(unionGrantMS(m), slaves.map(_.hready ^ autoready)) ^ autoready
|
||||
masters(m).hready := hready
|
||||
// If we diverted a master, we need to absorb his address phase to replay later
|
||||
diversions(m).io.divert := (bubbleM(m) || blockedM(m)) && NSeq(m) && hready
|
||||
}
|
||||
|
||||
// Master muxes (address and data phase are the same)
|
||||
(masters zip unionGrantMS) foreach { case (m, g) => {
|
||||
m.hrdata := Mux1H(g, slaves.map(_.hrdata))
|
||||
m.hresp := Mux1H(g, slaves.map(_.hresp))
|
||||
} }
|
||||
|
||||
// Slave address phase muxes
|
||||
(slaves zip addressPhaseGrantSM) foreach { case (s, g) => {
|
||||
s.htrans := Mux1H(g, masters.map(_.htrans))
|
||||
s.haddr := Mux1H(g, masters.map(_.haddr))
|
||||
s.hmastlock := isLocked
|
||||
s.hwrite := Mux1H(g, masters.map(_.hwrite))
|
||||
s.hsize := Mux1H(g, masters.map(_.hsize))
|
||||
s.hburst := Mux1H(g, masters.map(_.hburst))
|
||||
s.hprot := Mux1H(g, masters.map(_.hprot))
|
||||
s.hsel := g.reduce(_ || _)
|
||||
} }
|
||||
|
||||
// Slave data phase muxes
|
||||
(slaves zip dataPhaseGrantSM) foreach { case (s, g) => {
|
||||
s.hwdata := Mux1H(g, masters.map(_.hwdata))
|
||||
} }
|
||||
|
||||
// When no master-slave connections are active, a master can take-over the bus
|
||||
val canLock = !addressPhaseGrantSM.map({ v => v.reduce(_ || _) }).reduce(_ || _)
|
||||
|
||||
// Lowest index highest priority for lock arbitration
|
||||
val reqLock = masters.map(_.hmastlock)
|
||||
val winLock = PriorityEncoderOH(reqLock)
|
||||
|
||||
// Lock arbitration
|
||||
when (isLocked) {
|
||||
lockedM := (lockedM zip reqLock) map { case (a,b) => a && b }
|
||||
} .elsewhen (canLock) {
|
||||
lockedM := winLock
|
||||
}
|
||||
}
|
||||
|
||||
class HastiBus(amap: Seq[UInt=>Bool])(implicit p: Parameters) extends HastiModule()(p) {
|
||||
val io = new Bundle {
|
||||
val master = new HastiMasterIO().flip
|
||||
val slaves = Vec(amap.size, new HastiSlaveIO).flip
|
||||
}
|
||||
|
||||
val bar = Module(new HastiXbar(1, amap))
|
||||
bar.io.masters(0) <> io.master
|
||||
bar.io.slaves <> io.slaves
|
||||
}
|
||||
|
||||
class HastiSlaveMux(n: Int)(implicit p: Parameters) extends HastiModule()(p) {
|
||||
val io = new Bundle {
|
||||
val ins = Vec(n, new HastiSlaveIO)
|
||||
val out = new HastiSlaveIO().flip
|
||||
}
|
||||
|
||||
val amap = Seq({ (_:UInt) => Bool(true)})
|
||||
val bar = Module(new HastiXbar(n, amap))
|
||||
io.ins <> bar.io.masters
|
||||
io.out <> bar.io.slaves(0)
|
||||
}
|
||||
|
||||
class HastiSlaveToMaster(implicit p: Parameters) extends HastiModule()(p) {
|
||||
val io = new Bundle {
|
||||
val in = new HastiSlaveIO
|
||||
val out = new HastiMasterIO
|
||||
}
|
||||
|
||||
io.out.htrans := Mux(io.in.hsel, io.in.htrans, HTRANS_IDLE)
|
||||
io.out.hmastlock := io.in.hmastlock
|
||||
io.out.haddr := io.in.haddr
|
||||
io.out.hwrite := io.in.hwrite
|
||||
io.out.hburst := io.in.hburst
|
||||
io.out.hsize := io.in.hsize
|
||||
io.out.hprot := io.in.hprot
|
||||
io.out.hwdata := io.in.hwdata
|
||||
io.in.hrdata := io.out.hrdata
|
||||
io.in.hready := io.out.hready
|
||||
io.in.hresp := io.out.hresp
|
||||
}
|
||||
|
||||
class HastiMasterIONastiIOConverter(implicit p: Parameters) extends HastiModule()(p)
|
||||
with HasNastiParameters {
|
||||
val io = new Bundle {
|
||||
val nasti = new NastiIO().flip
|
||||
val hasti = new HastiMasterIO
|
||||
}
|
||||
|
||||
require(hastiAddrBits == nastiXAddrBits)
|
||||
require(hastiDataBits == nastiXDataBits)
|
||||
|
||||
val r_queue = Module(new Queue(new NastiReadDataChannel, 2, pipe = true))
|
||||
|
||||
val s_idle :: s_read :: s_write :: s_write_resp :: Nil = Enum(Bits(), 4)
|
||||
val state = Reg(init = s_idle)
|
||||
|
||||
val addr = Reg(UInt(width = hastiAddrBits))
|
||||
val id = Reg(UInt(width = nastiXIdBits))
|
||||
val size = Reg(UInt(width = nastiXSizeBits))
|
||||
val len = Reg(UInt(width = nastiXLenBits))
|
||||
val data = Reg(UInt(width = nastiXDataBits))
|
||||
val first = Reg(init = Bool(false))
|
||||
val is_rtrans = (state === s_read) &&
|
||||
(io.hasti.htrans === HTRANS_SEQ ||
|
||||
io.hasti.htrans === HTRANS_NONSEQ)
|
||||
val rvalid = RegEnable(is_rtrans, Bool(false), io.hasti.hready)
|
||||
|
||||
io.nasti.aw.ready := (state === s_idle)
|
||||
io.nasti.ar.ready := (state === s_idle) && !io.nasti.aw.valid
|
||||
io.nasti.w.ready := (state === s_write) && io.hasti.hready
|
||||
io.nasti.b.valid := (state === s_write_resp)
|
||||
io.nasti.b.bits := NastiWriteResponseChannel(id = id)
|
||||
io.nasti.r <> r_queue.io.deq
|
||||
|
||||
r_queue.io.enq.valid := io.hasti.hready && rvalid
|
||||
r_queue.io.enq.bits := NastiReadDataChannel(
|
||||
id = id,
|
||||
data = io.hasti.hrdata,
|
||||
last = (len === UInt(0)))
|
||||
|
||||
assert(!r_queue.io.enq.valid || r_queue.io.enq.ready,
|
||||
"NASTI -> HASTI converter queue overflow")
|
||||
|
||||
// How many read requests have we not delivered a response for yet?
|
||||
val pending_count = r_queue.io.count + rvalid
|
||||
|
||||
io.hasti.haddr := addr
|
||||
io.hasti.hsize := size
|
||||
io.hasti.hwrite := (state === s_write)
|
||||
io.hasti.hburst := HBURST_INCR
|
||||
io.hasti.hprot := UInt(0)
|
||||
io.hasti.hwdata := data
|
||||
io.hasti.hmastlock := Bool(false)
|
||||
io.hasti.htrans := MuxLookup(state, HTRANS_IDLE, Seq(
|
||||
s_write -> Mux(io.nasti.w.valid,
|
||||
Mux(first, HTRANS_NONSEQ, HTRANS_SEQ),
|
||||
Mux(first, HTRANS_IDLE, HTRANS_BUSY)),
|
||||
s_read -> MuxCase(HTRANS_BUSY, Seq(
|
||||
first -> HTRANS_NONSEQ,
|
||||
(pending_count <= UInt(1)) -> HTRANS_SEQ))))
|
||||
|
||||
when (io.nasti.aw.fire()) {
|
||||
first := Bool(true)
|
||||
addr := io.nasti.aw.bits.addr
|
||||
id := io.nasti.aw.bits.id
|
||||
size := io.nasti.aw.bits.size
|
||||
state := s_write
|
||||
}
|
||||
|
||||
when (io.nasti.ar.fire()) {
|
||||
first := Bool(true)
|
||||
addr := io.nasti.ar.bits.addr
|
||||
id := io.nasti.ar.bits.id
|
||||
size := io.nasti.ar.bits.size
|
||||
len := io.nasti.ar.bits.len
|
||||
state := s_read
|
||||
}
|
||||
|
||||
when (io.nasti.w.fire()) {
|
||||
first := Bool(false)
|
||||
addr := addr + (UInt(1) << size)
|
||||
data := io.nasti.w.bits.data
|
||||
when (io.nasti.w.bits.last) { state := s_write_resp }
|
||||
}
|
||||
|
||||
when (io.nasti.b.fire()) { state := s_idle }
|
||||
|
||||
when (is_rtrans && io.hasti.hready) {
|
||||
first := Bool(false)
|
||||
addr := addr + (UInt(1) << size)
|
||||
len := len - UInt(1)
|
||||
when (len === UInt(0)) { state := s_idle }
|
||||
}
|
||||
}
|
||||
|
||||
class HastiTestSRAM(depth: Int)(implicit p: Parameters) extends HastiModule()(p) {
|
||||
val io = new HastiSlaveIO
|
||||
|
||||
// This is a test SRAM with random delays
|
||||
val ready = LFSR16(Bool(true))(0) // Bool(true)
|
||||
|
||||
// Calculate the bitmask of which bytes are being accessed
|
||||
val mask_decode = Vec.tabulate(hastiAlignment+1) (UInt(_) <= io.hsize)
|
||||
val mask_wide = Vec.tabulate(hastiDataBytes) { i => mask_decode(log2Up(i+1)) }
|
||||
val mask_shift = if (hastiAlignment == 0) UInt(1) else
|
||||
mask_wide.asUInt() << io.haddr(hastiAlignment-1,0)
|
||||
|
||||
// The request had better have been aligned! (AHB-lite requires this)
|
||||
if (hastiAlignment >= 1) {
|
||||
assert (io.htrans === HTRANS_IDLE || io.htrans === HTRANS_BUSY ||
|
||||
(io.haddr & mask_decode.asUInt()(hastiAlignment,1)) === UInt(0),
|
||||
"HASTI request not aligned")
|
||||
}
|
||||
|
||||
// The mask and address during the address phase
|
||||
val a_request = io.hsel && (io.htrans === HTRANS_NONSEQ || io.htrans === HTRANS_SEQ)
|
||||
val a_mask = Wire(UInt(width = hastiDataBytes))
|
||||
val a_address = io.haddr(depth-1, hastiAlignment)
|
||||
val a_write = io.hwrite
|
||||
|
||||
// for backwards compatibility with chisel2, we needed a static width in definition
|
||||
a_mask := mask_shift(hastiDataBytes-1, 0)
|
||||
|
||||
// The data phase signals
|
||||
val d_read = RegEnable(a_request && !a_write, Bool(false), ready)
|
||||
val d_mask = RegEnable(a_mask, ready && a_request)
|
||||
val d_wdata = Vec.tabulate(hastiDataBytes) { i => io.hwdata(8*(i+1)-1, 8*i) }
|
||||
|
||||
// AHB writes must occur during the data phase; this poses a structural
|
||||
// hazard with reads which must occur during the address phase. To solve
|
||||
// this problem, we delay the writes until there is a free cycle.
|
||||
//
|
||||
// The idea is to record the address information from address phase and
|
||||
// then as soon as possible flush the pending write. This cannot be done
|
||||
// on a cycle when there is an address phase read, but on any other cycle
|
||||
// the write will execute. In the case of reads following a write, the
|
||||
// result must bypass data from the pending write into the read if they
|
||||
// happen to have matching address.
|
||||
|
||||
// Remove this once HoldUnless is in chisel3
|
||||
def holdUnless[T <: Data](in : T, enable: Bool): T = Mux(!enable, RegEnable(in, enable), in)
|
||||
|
||||
// Pending write?
|
||||
val p_valid = RegInit(Bool(false))
|
||||
val p_address = Reg(a_address)
|
||||
val p_mask = Reg(a_mask)
|
||||
val p_latch_d = RegNext(ready && a_request && a_write, Bool(false))
|
||||
val p_wdata = holdUnless(d_wdata, p_latch_d)
|
||||
|
||||
// Use single-ported memory with byte-write enable
|
||||
val mem = SeqMem(1 << (depth-hastiAlignment), Vec(hastiDataBytes, Bits(width = 8)))
|
||||
|
||||
// Decide is the SRAM port is used for reading or (potentially) writing
|
||||
val read = ready && a_request && !a_write
|
||||
// In case we are stalled, we need to hold the read data
|
||||
val d_rdata = holdUnless(mem.read(a_address, read), RegNext(read))
|
||||
// Whenever the port is not needed for reading, execute pending writes
|
||||
when (!read) {
|
||||
when (p_valid) { mem.write(p_address, p_wdata, p_mask.toBools) }
|
||||
p_valid := Bool(false)
|
||||
}
|
||||
|
||||
// Record the request for later?
|
||||
when (ready && a_request && a_write) {
|
||||
p_valid := Bool(true)
|
||||
p_address := a_address
|
||||
p_mask := a_mask
|
||||
}
|
||||
|
||||
// Does the read need to be muxed with the previous write?
|
||||
val a_bypass = a_address === p_address && p_valid
|
||||
val d_bypass = RegEnable(a_bypass, ready && a_request)
|
||||
|
||||
// Mux in data from the pending write
|
||||
val muxdata = Vec((p_mask.toBools zip (p_wdata zip d_rdata))
|
||||
map { case (m, (p, r)) => Mux(d_bypass && m, p, r) })
|
||||
// Wipe out any data the master should not see (for testing)
|
||||
val outdata = Vec((d_mask.toBools zip muxdata)
|
||||
map { case (m, p) => Mux(d_read && ready && m, p, Bits(0)) })
|
||||
|
||||
// Finally, the outputs
|
||||
io.hrdata := outdata.asUInt
|
||||
io.hready := ready
|
||||
io.hresp := HRESP_OKAY
|
||||
}
|
||||
317
src/main/scala/junctions/memserdes.scala
Normal file
317
src/main/scala/junctions/memserdes.scala
Normal file
@@ -0,0 +1,317 @@
|
||||
// See LICENSE for license details.
|
||||
|
||||
package junctions
|
||||
import Chisel._
|
||||
import scala.math._
|
||||
import cde.{Parameters, Field}
|
||||
|
||||
case object MIFAddrBits extends Field[Int]
|
||||
case object MIFDataBits extends Field[Int]
|
||||
case object MIFTagBits extends Field[Int]
|
||||
case object MIFDataBeats extends Field[Int]
|
||||
|
||||
trait HasMIFParameters {
|
||||
implicit val p: Parameters
|
||||
val mifTagBits = p(MIFTagBits)
|
||||
val mifAddrBits = p(MIFAddrBits)
|
||||
val mifDataBits = p(MIFDataBits)
|
||||
val mifDataBeats = p(MIFDataBeats)
|
||||
}
|
||||
|
||||
abstract class MIFModule(implicit val p: Parameters) extends Module with HasMIFParameters
|
||||
abstract class MIFBundle(implicit val p: Parameters) extends ParameterizedBundle()(p)
|
||||
with HasMIFParameters
|
||||
|
||||
trait HasMemData extends HasMIFParameters {
|
||||
val data = Bits(width = mifDataBits)
|
||||
}
|
||||
|
||||
trait HasMemAddr extends HasMIFParameters {
|
||||
val addr = UInt(width = mifAddrBits)
|
||||
}
|
||||
|
||||
trait HasMemTag extends HasMIFParameters {
|
||||
val tag = UInt(width = mifTagBits)
|
||||
}
|
||||
|
||||
class MemReqCmd(implicit p: Parameters) extends MIFBundle()(p) with HasMemAddr with HasMemTag {
|
||||
val rw = Bool()
|
||||
}
|
||||
|
||||
class MemTag(implicit p: Parameters) extends MIFBundle()(p) with HasMemTag
|
||||
class MemData(implicit p: Parameters) extends MIFBundle()(p) with HasMemData
|
||||
class MemResp(implicit p: Parameters) extends MIFBundle()(p) with HasMemData with HasMemTag
|
||||
|
||||
class MemIO(implicit p: Parameters) extends ParameterizedBundle()(p) {
|
||||
val req_cmd = Decoupled(new MemReqCmd)
|
||||
val req_data = Decoupled(new MemData)
|
||||
val resp = Decoupled(new MemResp).flip
|
||||
}
|
||||
|
||||
class MemPipeIO(implicit p: Parameters) extends ParameterizedBundle()(p) {
|
||||
val req_cmd = Decoupled(new MemReqCmd)
|
||||
val req_data = Decoupled(new MemData)
|
||||
val resp = Valid(new MemResp).flip
|
||||
}
|
||||
|
||||
class MemSerializedIO(w: Int)(implicit p: Parameters) extends ParameterizedBundle()(p) {
|
||||
val req = Decoupled(Bits(width = w))
|
||||
val resp = Valid(Bits(width = w)).flip
|
||||
override def cloneType = new MemSerializedIO(w)(p).asInstanceOf[this.type]
|
||||
}
|
||||
|
||||
class MemSerdes(w: Int)(implicit p: Parameters) extends MIFModule
|
||||
{
|
||||
val io = new Bundle {
|
||||
val wide = new MemIO().flip
|
||||
val narrow = new MemSerializedIO(w)
|
||||
}
|
||||
val abits = io.wide.req_cmd.bits.asUInt.getWidth
|
||||
val dbits = io.wide.req_data.bits.asUInt.getWidth
|
||||
val rbits = io.wide.resp.bits.getWidth
|
||||
|
||||
val out_buf = Reg(Bits())
|
||||
val in_buf = Reg(Bits())
|
||||
|
||||
val s_idle :: s_read_addr :: s_write_addr :: s_write_idle :: s_write_data :: Nil = Enum(UInt(), 5)
|
||||
val state = Reg(init=s_idle)
|
||||
val send_cnt = Reg(init=UInt(0, log2Up((max(abits, dbits)+w-1)/w)))
|
||||
val data_send_cnt = Reg(init=UInt(0, log2Up(mifDataBeats)))
|
||||
val adone = io.narrow.req.ready && send_cnt === UInt((abits-1)/w)
|
||||
val ddone = io.narrow.req.ready && send_cnt === UInt((dbits-1)/w)
|
||||
|
||||
when (io.narrow.req.valid && io.narrow.req.ready) {
|
||||
send_cnt := send_cnt + UInt(1)
|
||||
out_buf := out_buf >> UInt(w)
|
||||
}
|
||||
when (io.wide.req_cmd.valid && io.wide.req_cmd.ready) {
|
||||
out_buf := io.wide.req_cmd.bits.asUInt
|
||||
}
|
||||
when (io.wide.req_data.valid && io.wide.req_data.ready) {
|
||||
out_buf := io.wide.req_data.bits.asUInt
|
||||
}
|
||||
|
||||
io.wide.req_cmd.ready := state === s_idle
|
||||
io.wide.req_data.ready := state === s_write_idle
|
||||
io.narrow.req.valid := state === s_read_addr || state === s_write_addr || state === s_write_data
|
||||
io.narrow.req.bits := out_buf
|
||||
|
||||
when (state === s_idle && io.wide.req_cmd.valid) {
|
||||
state := Mux(io.wide.req_cmd.bits.rw, s_write_addr, s_read_addr)
|
||||
}
|
||||
when (state === s_read_addr && adone) {
|
||||
state := s_idle
|
||||
send_cnt := UInt(0)
|
||||
}
|
||||
when (state === s_write_addr && adone) {
|
||||
state := s_write_idle
|
||||
send_cnt := UInt(0)
|
||||
}
|
||||
when (state === s_write_idle && io.wide.req_data.valid) {
|
||||
state := s_write_data
|
||||
}
|
||||
when (state === s_write_data && ddone) {
|
||||
data_send_cnt := data_send_cnt + UInt(1)
|
||||
state := Mux(data_send_cnt === UInt(mifDataBeats-1), s_idle, s_write_idle)
|
||||
send_cnt := UInt(0)
|
||||
}
|
||||
|
||||
val recv_cnt = Reg(init=UInt(0, log2Up((rbits+w-1)/w)))
|
||||
val data_recv_cnt = Reg(init=UInt(0, log2Up(mifDataBeats)))
|
||||
val resp_val = Reg(init=Bool(false))
|
||||
|
||||
resp_val := Bool(false)
|
||||
when (io.narrow.resp.valid) {
|
||||
recv_cnt := recv_cnt + UInt(1)
|
||||
when (recv_cnt === UInt((rbits-1)/w)) {
|
||||
recv_cnt := UInt(0)
|
||||
data_recv_cnt := data_recv_cnt + UInt(1)
|
||||
resp_val := Bool(true)
|
||||
}
|
||||
in_buf := Cat(io.narrow.resp.bits, in_buf((rbits+w-1)/w*w-1,w))
|
||||
}
|
||||
|
||||
io.wide.resp.valid := resp_val
|
||||
io.wide.resp.bits := io.wide.resp.bits.fromBits(in_buf)
|
||||
}
|
||||
|
||||
class MemDesserIO(w: Int)(implicit p: Parameters) extends ParameterizedBundle()(p) {
|
||||
val narrow = new MemSerializedIO(w).flip
|
||||
val wide = new MemIO
|
||||
}
|
||||
|
||||
class MemDesser(w: Int)(implicit p: Parameters) extends Module // test rig side
|
||||
{
|
||||
val io = new MemDesserIO(w)
|
||||
val abits = io.wide.req_cmd.bits.asUInt.getWidth
|
||||
val dbits = io.wide.req_data.bits.asUInt.getWidth
|
||||
val rbits = io.wide.resp.bits.getWidth
|
||||
val mifDataBeats = p(MIFDataBeats)
|
||||
|
||||
require(dbits >= abits && rbits >= dbits)
|
||||
val recv_cnt = Reg(init=UInt(0, log2Up((rbits+w-1)/w)))
|
||||
val data_recv_cnt = Reg(init=UInt(0, log2Up(mifDataBeats)))
|
||||
val adone = io.narrow.req.valid && recv_cnt === UInt((abits-1)/w)
|
||||
val ddone = io.narrow.req.valid && recv_cnt === UInt((dbits-1)/w)
|
||||
val rdone = io.narrow.resp.valid && recv_cnt === UInt((rbits-1)/w)
|
||||
|
||||
val s_cmd_recv :: s_cmd :: s_data_recv :: s_data :: s_reply :: Nil = Enum(UInt(), 5)
|
||||
val state = Reg(init=s_cmd_recv)
|
||||
|
||||
val in_buf = Reg(Bits())
|
||||
when (io.narrow.req.valid && io.narrow.req.ready || io.narrow.resp.valid) {
|
||||
recv_cnt := recv_cnt + UInt(1)
|
||||
in_buf := Cat(io.narrow.req.bits, in_buf((rbits+w-1)/w*w-1,w))
|
||||
}
|
||||
io.narrow.req.ready := state === s_cmd_recv || state === s_data_recv
|
||||
|
||||
when (state === s_cmd_recv && adone) {
|
||||
state := s_cmd
|
||||
recv_cnt := UInt(0)
|
||||
}
|
||||
when (state === s_cmd && io.wide.req_cmd.ready) {
|
||||
state := Mux(io.wide.req_cmd.bits.rw, s_data_recv, s_reply)
|
||||
}
|
||||
when (state === s_data_recv && ddone) {
|
||||
state := s_data
|
||||
recv_cnt := UInt(0)
|
||||
}
|
||||
when (state === s_data && io.wide.req_data.ready) {
|
||||
state := s_data_recv
|
||||
when (data_recv_cnt === UInt(mifDataBeats-1)) {
|
||||
state := s_cmd_recv
|
||||
}
|
||||
data_recv_cnt := data_recv_cnt + UInt(1)
|
||||
}
|
||||
when (rdone) { // state === s_reply
|
||||
when (data_recv_cnt === UInt(mifDataBeats-1)) {
|
||||
state := s_cmd_recv
|
||||
}
|
||||
recv_cnt := UInt(0)
|
||||
data_recv_cnt := data_recv_cnt + UInt(1)
|
||||
}
|
||||
|
||||
val req_cmd = in_buf >> UInt(((rbits+w-1)/w - (abits+w-1)/w)*w)
|
||||
io.wide.req_cmd.valid := state === s_cmd
|
||||
io.wide.req_cmd.bits := io.wide.req_cmd.bits.fromBits(req_cmd)
|
||||
|
||||
io.wide.req_data.valid := state === s_data
|
||||
io.wide.req_data.bits.data := in_buf >> UInt(((rbits+w-1)/w - (dbits+w-1)/w)*w)
|
||||
|
||||
val dataq = Module(new Queue(new MemResp, mifDataBeats))
|
||||
dataq.io.enq <> io.wide.resp
|
||||
dataq.io.deq.ready := recv_cnt === UInt((rbits-1)/w)
|
||||
|
||||
io.narrow.resp.valid := dataq.io.deq.valid
|
||||
io.narrow.resp.bits := dataq.io.deq.bits.asUInt >> (recv_cnt * UInt(w))
|
||||
}
|
||||
|
||||
class MemIOArbiter(val arbN: Int)(implicit p: Parameters) extends MIFModule {
|
||||
val io = new Bundle {
|
||||
val inner = Vec(arbN, new MemIO).flip
|
||||
val outer = new MemIO
|
||||
}
|
||||
|
||||
if(arbN > 1) {
|
||||
val cmd_arb = Module(new RRArbiter(new MemReqCmd, arbN))
|
||||
val choice_q = Module(new Queue(cmd_arb.io.chosen, 4))
|
||||
val (data_cnt, data_done) = Counter(io.outer.req_data.fire(), mifDataBeats)
|
||||
|
||||
io.inner.map(_.req_cmd).zipWithIndex.zip(cmd_arb.io.in).map{ case ((req, id), arb) => {
|
||||
arb.valid := req.valid
|
||||
arb.bits := req.bits
|
||||
arb.bits.tag := Cat(req.bits.tag, UInt(id))
|
||||
req.ready := arb.ready
|
||||
}}
|
||||
io.outer.req_cmd.bits := cmd_arb.io.out.bits
|
||||
io.outer.req_cmd.valid := cmd_arb.io.out.valid && choice_q.io.enq.ready
|
||||
cmd_arb.io.out.ready := io.outer.req_cmd.ready && choice_q.io.enq.ready
|
||||
choice_q.io.enq.bits := cmd_arb.io.chosen
|
||||
choice_q.io.enq.valid := cmd_arb.io.out.fire() && cmd_arb.io.out.bits.rw
|
||||
|
||||
io.outer.req_data.bits := io.inner(choice_q.io.deq.bits).req_data.bits
|
||||
io.outer.req_data.valid := io.inner(choice_q.io.deq.bits).req_data.valid && choice_q.io.deq.valid
|
||||
io.inner.map(_.req_data.ready).zipWithIndex.foreach {
|
||||
case(r, i) => r := UInt(i) === choice_q.io.deq.bits && choice_q.io.deq.valid
|
||||
}
|
||||
choice_q.io.deq.ready := data_done
|
||||
|
||||
io.outer.resp.ready := Bool(false)
|
||||
for (i <- 0 until arbN) {
|
||||
io.inner(i).resp.valid := Bool(false)
|
||||
when(io.outer.resp.bits.tag(log2Up(arbN)-1,0) === UInt(i)) {
|
||||
io.inner(i).resp.valid := io.outer.resp.valid
|
||||
io.outer.resp.ready := io.inner(i).resp.ready
|
||||
}
|
||||
io.inner(i).resp.bits := io.outer.resp.bits
|
||||
io.inner(i).resp.bits.tag := io.outer.resp.bits.tag >> UInt(log2Up(arbN))
|
||||
}
|
||||
} else { io.outer <> io.inner.head }
|
||||
}
|
||||
|
||||
object MemIOMemPipeIOConverter {
|
||||
def apply(in: MemPipeIO)(implicit p: Parameters): MemIO = {
|
||||
val out = Wire(new MemIO())
|
||||
in.resp.valid := out.resp.valid
|
||||
in.resp.bits := out.resp.bits
|
||||
out.resp.ready := Bool(true)
|
||||
out.req_cmd.valid := in.req_cmd.valid
|
||||
out.req_cmd.bits := in.req_cmd.bits
|
||||
in.req_cmd.ready := out.req_cmd.ready
|
||||
out.req_data.valid := in.req_data.valid
|
||||
out.req_data.bits := in.req_data.bits
|
||||
in.req_data.ready := out.req_data.ready
|
||||
out
|
||||
}
|
||||
}
|
||||
|
||||
class MemPipeIOMemIOConverter(numRequests: Int)(implicit p: Parameters) extends MIFModule {
|
||||
val io = new Bundle {
|
||||
val cpu = new MemIO().flip
|
||||
val mem = new MemPipeIO
|
||||
}
|
||||
|
||||
val numEntries = numRequests * mifDataBeats
|
||||
val size = log2Down(numEntries) + 1
|
||||
|
||||
val inc = Wire(Bool())
|
||||
val dec = Wire(Bool())
|
||||
val count = Reg(init=UInt(numEntries, size))
|
||||
val watermark = count >= UInt(mifDataBeats)
|
||||
|
||||
when (inc && !dec) {
|
||||
count := count + UInt(1)
|
||||
}
|
||||
when (!inc && dec) {
|
||||
count := count - UInt(mifDataBeats)
|
||||
}
|
||||
when (inc && dec) {
|
||||
count := count - UInt(mifDataBeats-1)
|
||||
}
|
||||
|
||||
val cmdq_mask = io.cpu.req_cmd.bits.rw || watermark
|
||||
|
||||
io.mem.req_cmd.valid := io.cpu.req_cmd.valid && cmdq_mask
|
||||
io.cpu.req_cmd.ready := io.mem.req_cmd.ready && cmdq_mask
|
||||
io.mem.req_cmd.bits := io.cpu.req_cmd.bits
|
||||
|
||||
io.mem.req_data <> io.cpu.req_data
|
||||
|
||||
// Have separate queues to allow for different mem implementations
|
||||
val resp_data_q = Module((new HellaQueue(numEntries)) { new MemData })
|
||||
resp_data_q.io.enq.valid := io.mem.resp.valid
|
||||
resp_data_q.io.enq.bits.data := io.mem.resp.bits.data
|
||||
|
||||
val resp_tag_q = Module((new HellaQueue(numEntries)) { new MemTag })
|
||||
resp_tag_q.io.enq.valid := io.mem.resp.valid
|
||||
resp_tag_q.io.enq.bits.tag := io.mem.resp.bits.tag
|
||||
|
||||
io.cpu.resp.valid := resp_data_q.io.deq.valid && resp_tag_q.io.deq.valid
|
||||
io.cpu.resp.bits.data := resp_data_q.io.deq.bits.data
|
||||
io.cpu.resp.bits.tag := resp_tag_q.io.deq.bits.tag
|
||||
resp_data_q.io.deq.ready := io.cpu.resp.ready
|
||||
resp_tag_q.io.deq.ready := io.cpu.resp.ready
|
||||
|
||||
inc := resp_data_q.io.deq.fire() && resp_tag_q.io.deq.fire()
|
||||
dec := io.mem.req_cmd.fire() && !io.mem.req_cmd.bits.rw
|
||||
}
|
||||
737
src/main/scala/junctions/nasti.scala
Normal file
737
src/main/scala/junctions/nasti.scala
Normal file
@@ -0,0 +1,737 @@
|
||||
/// See LICENSE for license details.
|
||||
|
||||
package junctions
|
||||
import Chisel._
|
||||
import scala.math.max
|
||||
import scala.collection.mutable.ArraySeq
|
||||
import cde.{Parameters, Field}
|
||||
|
||||
case object NastiKey extends Field[NastiParameters]
|
||||
|
||||
case class NastiParameters(dataBits: Int, addrBits: Int, idBits: Int)
|
||||
|
||||
trait HasNastiParameters {
|
||||
implicit val p: Parameters
|
||||
val nastiExternal = p(NastiKey)
|
||||
val nastiXDataBits = nastiExternal.dataBits
|
||||
val nastiWStrobeBits = nastiXDataBits / 8
|
||||
val nastiXAddrBits = nastiExternal.addrBits
|
||||
val nastiWIdBits = nastiExternal.idBits
|
||||
val nastiRIdBits = nastiExternal.idBits
|
||||
val nastiXIdBits = max(nastiWIdBits, nastiRIdBits)
|
||||
val nastiXUserBits = 1
|
||||
val nastiAWUserBits = nastiXUserBits
|
||||
val nastiWUserBits = nastiXUserBits
|
||||
val nastiBUserBits = nastiXUserBits
|
||||
val nastiARUserBits = nastiXUserBits
|
||||
val nastiRUserBits = nastiXUserBits
|
||||
val nastiXLenBits = 8
|
||||
val nastiXSizeBits = 3
|
||||
val nastiXBurstBits = 2
|
||||
val nastiXCacheBits = 4
|
||||
val nastiXProtBits = 3
|
||||
val nastiXQosBits = 4
|
||||
val nastiXRegionBits = 4
|
||||
val nastiXRespBits = 2
|
||||
|
||||
def bytesToXSize(bytes: UInt) = MuxLookup(bytes, UInt("b111"), Array(
|
||||
UInt(1) -> UInt(0),
|
||||
UInt(2) -> UInt(1),
|
||||
UInt(4) -> UInt(2),
|
||||
UInt(8) -> UInt(3),
|
||||
UInt(16) -> UInt(4),
|
||||
UInt(32) -> UInt(5),
|
||||
UInt(64) -> UInt(6),
|
||||
UInt(128) -> UInt(7)))
|
||||
}
|
||||
|
||||
abstract class NastiModule(implicit val p: Parameters) extends Module
|
||||
with HasNastiParameters
|
||||
abstract class NastiBundle(implicit val p: Parameters) extends ParameterizedBundle()(p)
|
||||
with HasNastiParameters
|
||||
|
||||
abstract class NastiChannel(implicit p: Parameters) extends NastiBundle()(p)
|
||||
abstract class NastiMasterToSlaveChannel(implicit p: Parameters) extends NastiChannel()(p)
|
||||
abstract class NastiSlaveToMasterChannel(implicit p: Parameters) extends NastiChannel()(p)
|
||||
|
||||
trait HasNastiMetadata extends HasNastiParameters {
|
||||
val addr = UInt(width = nastiXAddrBits)
|
||||
val len = UInt(width = nastiXLenBits)
|
||||
val size = UInt(width = nastiXSizeBits)
|
||||
val burst = UInt(width = nastiXBurstBits)
|
||||
val lock = Bool()
|
||||
val cache = UInt(width = nastiXCacheBits)
|
||||
val prot = UInt(width = nastiXProtBits)
|
||||
val qos = UInt(width = nastiXQosBits)
|
||||
val region = UInt(width = nastiXRegionBits)
|
||||
}
|
||||
|
||||
trait HasNastiData extends HasNastiParameters {
|
||||
val data = UInt(width = nastiXDataBits)
|
||||
val last = Bool()
|
||||
}
|
||||
|
||||
class NastiReadIO(implicit val p: Parameters) extends ParameterizedBundle()(p) {
|
||||
val ar = Decoupled(new NastiReadAddressChannel)
|
||||
val r = Decoupled(new NastiReadDataChannel).flip
|
||||
}
|
||||
|
||||
class NastiWriteIO(implicit val p: Parameters) extends ParameterizedBundle()(p) {
|
||||
val aw = Decoupled(new NastiWriteAddressChannel)
|
||||
val w = Decoupled(new NastiWriteDataChannel)
|
||||
val b = Decoupled(new NastiWriteResponseChannel).flip
|
||||
}
|
||||
|
||||
class NastiIO(implicit val p: Parameters) extends ParameterizedBundle()(p) {
|
||||
val aw = Decoupled(new NastiWriteAddressChannel)
|
||||
val w = Decoupled(new NastiWriteDataChannel)
|
||||
val b = Decoupled(new NastiWriteResponseChannel).flip
|
||||
val ar = Decoupled(new NastiReadAddressChannel)
|
||||
val r = Decoupled(new NastiReadDataChannel).flip
|
||||
}
|
||||
|
||||
class NastiAddressChannel(implicit p: Parameters) extends NastiMasterToSlaveChannel()(p)
|
||||
with HasNastiMetadata
|
||||
|
||||
class NastiResponseChannel(implicit p: Parameters) extends NastiSlaveToMasterChannel()(p) {
|
||||
val resp = UInt(width = nastiXRespBits)
|
||||
}
|
||||
|
||||
class NastiWriteAddressChannel(implicit p: Parameters) extends NastiAddressChannel()(p) {
|
||||
val id = UInt(width = nastiWIdBits)
|
||||
val user = UInt(width = nastiAWUserBits)
|
||||
}
|
||||
|
||||
class NastiWriteDataChannel(implicit p: Parameters) extends NastiMasterToSlaveChannel()(p)
|
||||
with HasNastiData {
|
||||
val id = UInt(width = nastiWIdBits)
|
||||
val strb = UInt(width = nastiWStrobeBits)
|
||||
val user = UInt(width = nastiWUserBits)
|
||||
}
|
||||
|
||||
class NastiWriteResponseChannel(implicit p: Parameters) extends NastiResponseChannel()(p) {
|
||||
val id = UInt(width = nastiWIdBits)
|
||||
val user = UInt(width = nastiBUserBits)
|
||||
}
|
||||
|
||||
class NastiReadAddressChannel(implicit p: Parameters) extends NastiAddressChannel()(p) {
|
||||
val id = UInt(width = nastiRIdBits)
|
||||
val user = UInt(width = nastiARUserBits)
|
||||
}
|
||||
|
||||
class NastiReadDataChannel(implicit p: Parameters) extends NastiResponseChannel()(p)
|
||||
with HasNastiData {
|
||||
val id = UInt(width = nastiRIdBits)
|
||||
val user = UInt(width = nastiRUserBits)
|
||||
}
|
||||
|
||||
object NastiConstants {
|
||||
val BURST_FIXED = UInt("b00")
|
||||
val BURST_INCR = UInt("b01")
|
||||
val BURST_WRAP = UInt("b10")
|
||||
|
||||
val RESP_OKAY = UInt("b00")
|
||||
val RESP_EXOKAY = UInt("b01")
|
||||
val RESP_SLVERR = UInt("b10")
|
||||
val RESP_DECERR = UInt("b11")
|
||||
}
|
||||
|
||||
import NastiConstants._
|
||||
|
||||
object NastiWriteAddressChannel {
|
||||
def apply(id: UInt, addr: UInt, size: UInt,
|
||||
len: UInt = UInt(0), burst: UInt = BURST_INCR)
|
||||
(implicit p: Parameters) = {
|
||||
val aw = Wire(new NastiWriteAddressChannel)
|
||||
aw.id := id
|
||||
aw.addr := addr
|
||||
aw.len := len
|
||||
aw.size := size
|
||||
aw.burst := burst
|
||||
aw.lock := Bool(false)
|
||||
aw.cache := UInt("b0000")
|
||||
aw.prot := UInt("b000")
|
||||
aw.qos := UInt("b0000")
|
||||
aw.region := UInt("b0000")
|
||||
aw.user := UInt(0)
|
||||
aw
|
||||
}
|
||||
}
|
||||
|
||||
object NastiReadAddressChannel {
|
||||
def apply(id: UInt, addr: UInt, size: UInt,
|
||||
len: UInt = UInt(0), burst: UInt = BURST_INCR)
|
||||
(implicit p: Parameters) = {
|
||||
val ar = Wire(new NastiReadAddressChannel)
|
||||
ar.id := id
|
||||
ar.addr := addr
|
||||
ar.len := len
|
||||
ar.size := size
|
||||
ar.burst := burst
|
||||
ar.lock := Bool(false)
|
||||
ar.cache := UInt(0)
|
||||
ar.prot := UInt(0)
|
||||
ar.qos := UInt(0)
|
||||
ar.region := UInt(0)
|
||||
ar.user := UInt(0)
|
||||
ar
|
||||
}
|
||||
}
|
||||
|
||||
object NastiWriteDataChannel {
|
||||
def apply(data: UInt, strb: Option[UInt] = None,
|
||||
last: Bool = Bool(true), id: UInt = UInt(0))
|
||||
(implicit p: Parameters): NastiWriteDataChannel = {
|
||||
val w = Wire(new NastiWriteDataChannel)
|
||||
w.strb := strb.getOrElse(Fill(w.nastiWStrobeBits, UInt(1, 1)))
|
||||
w.data := data
|
||||
w.last := last
|
||||
w.id := id
|
||||
w.user := UInt(0)
|
||||
w
|
||||
}
|
||||
}
|
||||
|
||||
object NastiReadDataChannel {
|
||||
def apply(id: UInt, data: UInt, last: Bool = Bool(true), resp: UInt = UInt(0))(
|
||||
implicit p: Parameters) = {
|
||||
val r = Wire(new NastiReadDataChannel)
|
||||
r.id := id
|
||||
r.data := data
|
||||
r.last := last
|
||||
r.resp := resp
|
||||
r.user := UInt(0)
|
||||
r
|
||||
}
|
||||
}
|
||||
|
||||
object NastiWriteResponseChannel {
|
||||
def apply(id: UInt, resp: UInt = UInt(0))(implicit p: Parameters) = {
|
||||
val b = Wire(new NastiWriteResponseChannel)
|
||||
b.id := id
|
||||
b.resp := resp
|
||||
b.user := UInt(0)
|
||||
b
|
||||
}
|
||||
}
|
||||
|
||||
class MemIONastiIOConverter(cacheBlockOffsetBits: Int)(implicit p: Parameters) extends MIFModule
|
||||
with HasNastiParameters {
|
||||
val io = new Bundle {
|
||||
val nasti = (new NastiIO).flip
|
||||
val mem = new MemIO
|
||||
}
|
||||
|
||||
require(mifDataBits == nastiXDataBits, "Data sizes between LLC and MC don't agree")
|
||||
val (mif_cnt_out, mif_wrap_out) = Counter(io.mem.resp.fire(), mifDataBeats)
|
||||
|
||||
assert(!io.nasti.aw.valid || io.nasti.aw.bits.size === UInt(log2Up(mifDataBits/8)),
|
||||
"Nasti data size does not match MemIO data size")
|
||||
assert(!io.nasti.ar.valid || io.nasti.ar.bits.size === UInt(log2Up(mifDataBits/8)),
|
||||
"Nasti data size does not match MemIO data size")
|
||||
assert(!io.nasti.aw.valid || io.nasti.aw.bits.len === UInt(mifDataBeats - 1),
|
||||
"Nasti length does not match number of MemIO beats")
|
||||
assert(!io.nasti.ar.valid || io.nasti.ar.bits.len === UInt(mifDataBeats - 1),
|
||||
"Nasti length does not match number of MemIO beats")
|
||||
|
||||
// according to the spec, we can't send b until the last transfer on w
|
||||
val b_ok = Reg(init = Bool(true))
|
||||
when (io.nasti.aw.fire()) { b_ok := Bool(false) }
|
||||
when (io.nasti.w.fire() && io.nasti.w.bits.last) { b_ok := Bool(true) }
|
||||
|
||||
val id_q = Module(new Queue(UInt(width = nastiWIdBits), 2))
|
||||
id_q.io.enq.valid := io.nasti.aw.valid && io.mem.req_cmd.ready
|
||||
id_q.io.enq.bits := io.nasti.aw.bits.id
|
||||
id_q.io.deq.ready := io.nasti.b.ready && b_ok
|
||||
|
||||
io.mem.req_cmd.bits.addr := Mux(io.nasti.aw.valid, io.nasti.aw.bits.addr, io.nasti.ar.bits.addr) >>
|
||||
UInt(cacheBlockOffsetBits)
|
||||
io.mem.req_cmd.bits.tag := Mux(io.nasti.aw.valid, io.nasti.aw.bits.id, io.nasti.ar.bits.id)
|
||||
io.mem.req_cmd.bits.rw := io.nasti.aw.valid
|
||||
io.mem.req_cmd.valid := (io.nasti.aw.valid && id_q.io.enq.ready) || io.nasti.ar.valid
|
||||
io.nasti.ar.ready := io.mem.req_cmd.ready && !io.nasti.aw.valid
|
||||
io.nasti.aw.ready := io.mem.req_cmd.ready && id_q.io.enq.ready
|
||||
|
||||
io.nasti.b.valid := id_q.io.deq.valid && b_ok
|
||||
io.nasti.b.bits.id := id_q.io.deq.bits
|
||||
io.nasti.b.bits.resp := UInt(0)
|
||||
|
||||
io.nasti.w.ready := io.mem.req_data.ready
|
||||
io.mem.req_data.valid := io.nasti.w.valid
|
||||
io.mem.req_data.bits.data := io.nasti.w.bits.data
|
||||
assert(!io.nasti.w.valid || io.nasti.w.bits.strb.andR, "MemIO must write full cache line")
|
||||
|
||||
io.nasti.r.valid := io.mem.resp.valid
|
||||
io.nasti.r.bits.data := io.mem.resp.bits.data
|
||||
io.nasti.r.bits.last := mif_wrap_out
|
||||
io.nasti.r.bits.id := io.mem.resp.bits.tag
|
||||
io.nasti.r.bits.resp := UInt(0)
|
||||
io.mem.resp.ready := io.nasti.r.ready
|
||||
}
|
||||
|
||||
class NastiArbiterIO(arbN: Int)(implicit p: Parameters) extends Bundle {
|
||||
val master = Vec(arbN, new NastiIO).flip
|
||||
val slave = new NastiIO
|
||||
override def cloneType =
|
||||
new NastiArbiterIO(arbN).asInstanceOf[this.type]
|
||||
}
|
||||
|
||||
/** Arbitrate among arbN masters requesting to a single slave */
|
||||
class NastiArbiter(val arbN: Int)(implicit p: Parameters) extends NastiModule {
|
||||
val io = new NastiArbiterIO(arbN)
|
||||
|
||||
if (arbN > 1) {
|
||||
val arbIdBits = log2Up(arbN)
|
||||
|
||||
val ar_arb = Module(new RRArbiter(new NastiReadAddressChannel, arbN))
|
||||
val aw_arb = Module(new RRArbiter(new NastiWriteAddressChannel, arbN))
|
||||
|
||||
val slave_r_arb_id = io.slave.r.bits.id(arbIdBits - 1, 0)
|
||||
val slave_b_arb_id = io.slave.b.bits.id(arbIdBits - 1, 0)
|
||||
|
||||
val w_chosen = Reg(UInt(width = arbIdBits))
|
||||
val w_done = Reg(init = Bool(true))
|
||||
|
||||
when (aw_arb.io.out.fire()) {
|
||||
w_chosen := aw_arb.io.chosen
|
||||
w_done := Bool(false)
|
||||
}
|
||||
|
||||
when (io.slave.w.fire() && io.slave.w.bits.last) {
|
||||
w_done := Bool(true)
|
||||
}
|
||||
|
||||
for (i <- 0 until arbN) {
|
||||
val m_ar = io.master(i).ar
|
||||
val m_aw = io.master(i).aw
|
||||
val m_r = io.master(i).r
|
||||
val m_b = io.master(i).b
|
||||
val a_ar = ar_arb.io.in(i)
|
||||
val a_aw = aw_arb.io.in(i)
|
||||
val m_w = io.master(i).w
|
||||
|
||||
a_ar <> m_ar
|
||||
a_ar.bits.id := Cat(m_ar.bits.id, UInt(i, arbIdBits))
|
||||
|
||||
a_aw <> m_aw
|
||||
a_aw.bits.id := Cat(m_aw.bits.id, UInt(i, arbIdBits))
|
||||
|
||||
m_r.valid := io.slave.r.valid && slave_r_arb_id === UInt(i)
|
||||
m_r.bits := io.slave.r.bits
|
||||
m_r.bits.id := io.slave.r.bits.id >> UInt(arbIdBits)
|
||||
|
||||
m_b.valid := io.slave.b.valid && slave_b_arb_id === UInt(i)
|
||||
m_b.bits := io.slave.b.bits
|
||||
m_b.bits.id := io.slave.b.bits.id >> UInt(arbIdBits)
|
||||
|
||||
m_w.ready := io.slave.w.ready && w_chosen === UInt(i) && !w_done
|
||||
}
|
||||
|
||||
io.slave.r.ready := io.master(slave_r_arb_id).r.ready
|
||||
io.slave.b.ready := io.master(slave_b_arb_id).b.ready
|
||||
|
||||
io.slave.w.bits := io.master(w_chosen).w.bits
|
||||
io.slave.w.valid := io.master(w_chosen).w.valid && !w_done
|
||||
|
||||
io.slave.ar <> ar_arb.io.out
|
||||
|
||||
io.slave.aw.bits <> aw_arb.io.out.bits
|
||||
io.slave.aw.valid := aw_arb.io.out.valid && w_done
|
||||
aw_arb.io.out.ready := io.slave.aw.ready && w_done
|
||||
|
||||
} else { io.slave <> io.master.head }
|
||||
}
|
||||
|
||||
/** A slave that send decode error for every request it receives */
|
||||
class NastiErrorSlave(implicit p: Parameters) extends NastiModule {
|
||||
val io = (new NastiIO).flip
|
||||
|
||||
when (io.ar.fire()) { printf("Invalid read address %x\n", io.ar.bits.addr) }
|
||||
when (io.aw.fire()) { printf("Invalid write address %x\n", io.aw.bits.addr) }
|
||||
|
||||
val r_queue = Module(new Queue(new NastiReadAddressChannel, 1))
|
||||
r_queue.io.enq <> io.ar
|
||||
|
||||
val responding = Reg(init = Bool(false))
|
||||
val beats_left = Reg(init = UInt(0, nastiXLenBits))
|
||||
|
||||
when (!responding && r_queue.io.deq.valid) {
|
||||
responding := Bool(true)
|
||||
beats_left := r_queue.io.deq.bits.len
|
||||
}
|
||||
|
||||
io.r.valid := r_queue.io.deq.valid && responding
|
||||
io.r.bits.id := r_queue.io.deq.bits.id
|
||||
io.r.bits.data := UInt(0)
|
||||
io.r.bits.resp := RESP_DECERR
|
||||
io.r.bits.last := beats_left === UInt(0)
|
||||
|
||||
r_queue.io.deq.ready := io.r.fire() && io.r.bits.last
|
||||
|
||||
when (io.r.fire()) {
|
||||
when (beats_left === UInt(0)) {
|
||||
responding := Bool(false)
|
||||
} .otherwise {
|
||||
beats_left := beats_left - UInt(1)
|
||||
}
|
||||
}
|
||||
|
||||
val draining = Reg(init = Bool(false))
|
||||
io.w.ready := draining
|
||||
|
||||
when (io.aw.fire()) { draining := Bool(true) }
|
||||
when (io.w.fire() && io.w.bits.last) { draining := Bool(false) }
|
||||
|
||||
val b_queue = Module(new Queue(UInt(width = nastiWIdBits), 1))
|
||||
b_queue.io.enq.valid := io.aw.valid && !draining
|
||||
b_queue.io.enq.bits := io.aw.bits.id
|
||||
io.aw.ready := b_queue.io.enq.ready && !draining
|
||||
io.b.valid := b_queue.io.deq.valid && !draining
|
||||
io.b.bits.id := b_queue.io.deq.bits
|
||||
io.b.bits.resp := Bits("b11")
|
||||
b_queue.io.deq.ready := io.b.ready && !draining
|
||||
}
|
||||
|
||||
class NastiRouterIO(nSlaves: Int)(implicit p: Parameters) extends Bundle {
|
||||
val master = (new NastiIO).flip
|
||||
val slave = Vec(nSlaves, new NastiIO)
|
||||
override def cloneType =
|
||||
new NastiRouterIO(nSlaves).asInstanceOf[this.type]
|
||||
}
|
||||
|
||||
/** Take a single Nasti master and route its requests to various slaves
|
||||
* @param nSlaves the number of slaves
|
||||
* @param routeSel a function which takes an address and produces
|
||||
* a one-hot encoded selection of the slave to write to */
|
||||
class NastiRouter(nSlaves: Int, routeSel: UInt => UInt)(implicit p: Parameters)
|
||||
extends NastiModule {
|
||||
|
||||
val io = new NastiRouterIO(nSlaves)
|
||||
|
||||
val ar_route = routeSel(io.master.ar.bits.addr)
|
||||
val aw_route = routeSel(io.master.aw.bits.addr)
|
||||
|
||||
var ar_ready = Bool(false)
|
||||
var aw_ready = Bool(false)
|
||||
var w_ready = Bool(false)
|
||||
|
||||
io.slave.zipWithIndex.foreach { case (s, i) =>
|
||||
s.ar.valid := io.master.ar.valid && ar_route(i)
|
||||
s.ar.bits := io.master.ar.bits
|
||||
ar_ready = ar_ready || (s.ar.ready && ar_route(i))
|
||||
|
||||
s.aw.valid := io.master.aw.valid && aw_route(i)
|
||||
s.aw.bits := io.master.aw.bits
|
||||
aw_ready = aw_ready || (s.aw.ready && aw_route(i))
|
||||
|
||||
val chosen = Reg(init = Bool(false))
|
||||
when (s.w.fire() && s.w.bits.last) { chosen := Bool(false) }
|
||||
when (s.aw.fire()) { chosen := Bool(true) }
|
||||
|
||||
s.w.valid := io.master.w.valid && chosen
|
||||
s.w.bits := io.master.w.bits
|
||||
w_ready = w_ready || (s.w.ready && chosen)
|
||||
}
|
||||
|
||||
val r_invalid = !ar_route.orR
|
||||
val w_invalid = !aw_route.orR
|
||||
|
||||
val err_slave = Module(new NastiErrorSlave)
|
||||
err_slave.io.ar.valid := r_invalid && io.master.ar.valid
|
||||
err_slave.io.ar.bits := io.master.ar.bits
|
||||
err_slave.io.aw.valid := w_invalid && io.master.aw.valid
|
||||
err_slave.io.aw.bits := io.master.aw.bits
|
||||
err_slave.io.w.valid := io.master.w.valid
|
||||
err_slave.io.w.bits := io.master.w.bits
|
||||
|
||||
io.master.ar.ready := ar_ready || (r_invalid && err_slave.io.ar.ready)
|
||||
io.master.aw.ready := aw_ready || (w_invalid && err_slave.io.aw.ready)
|
||||
io.master.w.ready := w_ready || err_slave.io.w.ready
|
||||
|
||||
val b_arb = Module(new RRArbiter(new NastiWriteResponseChannel, nSlaves + 1))
|
||||
val r_arb = Module(new JunctionsPeekingArbiter(
|
||||
new NastiReadDataChannel, nSlaves + 1,
|
||||
// we can unlock if it's the last beat
|
||||
(r: NastiReadDataChannel) => r.last))
|
||||
|
||||
for (i <- 0 until nSlaves) {
|
||||
b_arb.io.in(i) <> io.slave(i).b
|
||||
r_arb.io.in(i) <> io.slave(i).r
|
||||
}
|
||||
|
||||
b_arb.io.in(nSlaves) <> err_slave.io.b
|
||||
r_arb.io.in(nSlaves) <> err_slave.io.r
|
||||
|
||||
io.master.b <> b_arb.io.out
|
||||
io.master.r <> r_arb.io.out
|
||||
}
|
||||
|
||||
/** Crossbar between multiple Nasti masters and slaves
|
||||
* @param nMasters the number of Nasti masters
|
||||
* @param nSlaves the number of Nasti slaves
|
||||
* @param routeSel a function selecting the slave to route an address to */
|
||||
class NastiCrossbar(nMasters: Int, nSlaves: Int, routeSel: UInt => UInt)
|
||||
(implicit p: Parameters) extends NastiModule {
|
||||
val io = new Bundle {
|
||||
val masters = Vec(nMasters, new NastiIO).flip
|
||||
val slaves = Vec(nSlaves, new NastiIO)
|
||||
}
|
||||
|
||||
if (nMasters == 1) {
|
||||
val router = Module(new NastiRouter(nSlaves, routeSel))
|
||||
router.io.master <> io.masters.head
|
||||
io.slaves <> router.io.slave
|
||||
} else {
|
||||
val routers = Vec.fill(nMasters) { Module(new NastiRouter(nSlaves, routeSel)).io }
|
||||
val arbiters = Vec.fill(nSlaves) { Module(new NastiArbiter(nMasters)).io }
|
||||
|
||||
for (i <- 0 until nMasters) {
|
||||
routers(i).master <> io.masters(i)
|
||||
}
|
||||
|
||||
for (i <- 0 until nSlaves) {
|
||||
arbiters(i).master <> Vec(routers.map(r => r.slave(i)))
|
||||
io.slaves(i) <> arbiters(i).slave
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
class NastiInterconnectIO(val nMasters: Int, val nSlaves: Int)
|
||||
(implicit p: Parameters) extends Bundle {
|
||||
/* This is a bit confusing. The interconnect is a slave to the masters and
|
||||
* a master to the slaves. Hence why the declarations seem to be backwards. */
|
||||
val masters = Vec(nMasters, new NastiIO).flip
|
||||
val slaves = Vec(nSlaves, new NastiIO)
|
||||
override def cloneType =
|
||||
new NastiInterconnectIO(nMasters, nSlaves).asInstanceOf[this.type]
|
||||
}
|
||||
|
||||
abstract class NastiInterconnect(implicit p: Parameters) extends NastiModule()(p) {
|
||||
val nMasters: Int
|
||||
val nSlaves: Int
|
||||
|
||||
lazy val io = new NastiInterconnectIO(nMasters, nSlaves)
|
||||
}
|
||||
|
||||
class NastiRecursiveInterconnect(val nMasters: Int, addrMap: AddrMap)
|
||||
(implicit p: Parameters) extends NastiInterconnect()(p) {
|
||||
def port(name: String) = io.slaves(addrMap.port(name))
|
||||
val nSlaves = addrMap.numSlaves
|
||||
val routeSel = (addr: UInt) =>
|
||||
Cat(addrMap.entries.map(e => addrMap(e.name).containsAddress(addr)).reverse)
|
||||
|
||||
val xbar = Module(new NastiCrossbar(nMasters, addrMap.length, routeSel))
|
||||
xbar.io.masters <> io.masters
|
||||
|
||||
io.slaves <> addrMap.entries.zip(xbar.io.slaves).flatMap {
|
||||
case (entry, xbarSlave) => {
|
||||
entry.region match {
|
||||
case submap: AddrMap if submap.entries.isEmpty =>
|
||||
val err_slave = Module(new NastiErrorSlave)
|
||||
err_slave.io <> xbarSlave
|
||||
None
|
||||
case submap: AddrMap =>
|
||||
val ic = Module(new NastiRecursiveInterconnect(1, submap))
|
||||
ic.io.masters.head <> xbarSlave
|
||||
ic.io.slaves
|
||||
case r: MemRange =>
|
||||
Some(xbarSlave)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
class ChannelHelper(nChannels: Int)
|
||||
(implicit val p: Parameters) extends HasNastiParameters {
|
||||
|
||||
val dataBytes = p(MIFDataBits) * p(MIFDataBeats) / 8
|
||||
val chanSelBits = log2Ceil(nChannels)
|
||||
val selOffset = log2Up(dataBytes)
|
||||
val blockOffset = selOffset + chanSelBits
|
||||
|
||||
def getSelect(addr: UInt) =
|
||||
if (nChannels > 1) addr(blockOffset - 1, selOffset) else UInt(0)
|
||||
|
||||
def getAddr(addr: UInt) =
|
||||
if (nChannels > 1)
|
||||
Cat(addr(nastiXAddrBits - 1, blockOffset), addr(selOffset - 1, 0))
|
||||
else addr
|
||||
}
|
||||
|
||||
class NastiMemoryInterconnect(
|
||||
nBanksPerChannel: Int, nChannels: Int)
|
||||
(implicit p: Parameters) extends NastiInterconnect()(p) {
|
||||
|
||||
val nBanks = nBanksPerChannel * nChannels
|
||||
val nMasters = nBanks
|
||||
val nSlaves = nChannels
|
||||
|
||||
val chanHelper = new ChannelHelper(nChannels)
|
||||
def connectChannel(outer: NastiIO, inner: NastiIO) {
|
||||
outer <> inner
|
||||
outer.ar.bits.addr := chanHelper.getAddr(inner.ar.bits.addr)
|
||||
outer.aw.bits.addr := chanHelper.getAddr(inner.aw.bits.addr)
|
||||
}
|
||||
|
||||
for (i <- 0 until nChannels) {
|
||||
/* Bank assignments to channels are strided so that consecutive banks
|
||||
* map to different channels. That way, consecutive cache lines also
|
||||
* map to different channels */
|
||||
val banks = (i until nBanks by nChannels).map(j => io.masters(j))
|
||||
|
||||
val channelArb = Module(new NastiArbiter(nBanksPerChannel))
|
||||
channelArb.io.master <> banks
|
||||
connectChannel(io.slaves(i), channelArb.io.slave)
|
||||
}
|
||||
}
|
||||
|
||||
/** Allows users to switch between various memory configurations. Note that
|
||||
* this is a dangerous operation: not only does switching the select input to
|
||||
* this module violate Nasti, it also causes the memory of the machine to
|
||||
* become garbled. It's expected that select only changes at boot time, as
|
||||
* part of the memory controller configuration. */
|
||||
class NastiMemorySelectorIO(val nBanks: Int, val maxMemChannels: Int, nConfigs: Int)
|
||||
(implicit p: Parameters)
|
||||
extends NastiInterconnectIO(nBanks, maxMemChannels) {
|
||||
val select = UInt(INPUT, width = log2Up(nConfigs))
|
||||
override def cloneType =
|
||||
new NastiMemorySelectorIO(nMasters, nSlaves, nConfigs).asInstanceOf[this.type]
|
||||
}
|
||||
|
||||
class NastiMemorySelector(nBanks: Int, maxMemChannels: Int, configs: Seq[Int])
|
||||
(implicit p: Parameters)
|
||||
extends NastiInterconnect()(p) {
|
||||
val nMasters = nBanks
|
||||
val nSlaves = maxMemChannels
|
||||
val nConfigs = configs.size
|
||||
|
||||
override lazy val io = new NastiMemorySelectorIO(nBanks, maxMemChannels, nConfigs)
|
||||
|
||||
def muxOnSelect(up: DecoupledIO[Bundle], dn: DecoupledIO[Bundle], active: Bool): Unit = {
|
||||
when (active) { dn.bits := up.bits }
|
||||
when (active) { up.ready := dn.ready }
|
||||
when (active) { dn.valid := up.valid }
|
||||
}
|
||||
|
||||
def muxOnSelect(up: NastiIO, dn: NastiIO, active: Bool): Unit = {
|
||||
muxOnSelect(up.aw, dn.aw, active)
|
||||
muxOnSelect(up.w, dn.w, active)
|
||||
muxOnSelect(dn.b, up.b, active)
|
||||
muxOnSelect(up.ar, dn.ar, active)
|
||||
muxOnSelect(dn.r, up.r, active)
|
||||
}
|
||||
|
||||
def muxOnSelect(up: Vec[NastiIO], dn: Vec[NastiIO], active: Bool) : Unit = {
|
||||
for (i <- 0 until up.size)
|
||||
muxOnSelect(up(i), dn(i), active)
|
||||
}
|
||||
|
||||
/* Disconnects a vector of Nasti ports, which involves setting them to
|
||||
* invalid. Due to Chisel reasons, we need to also set the bits to 0 (since
|
||||
* there can't be any unconnected inputs). */
|
||||
def disconnectSlave(slave: Vec[NastiIO]) = {
|
||||
slave.foreach{ m =>
|
||||
m.aw.valid := Bool(false)
|
||||
m.aw.bits := m.aw.bits.fromBits( UInt(0) )
|
||||
m.w.valid := Bool(false)
|
||||
m.w.bits := m.w.bits.fromBits( UInt(0) )
|
||||
m.b.ready := Bool(false)
|
||||
m.ar.valid := Bool(false)
|
||||
m.ar.bits := m.ar.bits.fromBits( UInt(0) )
|
||||
m.r.ready := Bool(false)
|
||||
}
|
||||
}
|
||||
|
||||
def disconnectMaster(master: Vec[NastiIO]) = {
|
||||
master.foreach{ m =>
|
||||
m.aw.ready := Bool(false)
|
||||
m.w.ready := Bool(false)
|
||||
m.b.valid := Bool(false)
|
||||
m.b.bits := m.b.bits.fromBits( UInt(0) )
|
||||
m.ar.ready := Bool(false)
|
||||
m.r.valid := Bool(false)
|
||||
m.r.bits := m.r.bits.fromBits( UInt(0) )
|
||||
}
|
||||
}
|
||||
|
||||
/* Provides default wires on all our outputs. */
|
||||
disconnectMaster(io.masters)
|
||||
disconnectSlave(io.slaves)
|
||||
|
||||
/* Constructs interconnects for each of the layouts suggested by the
|
||||
* configuration and switches between them based on the select input. */
|
||||
configs.zipWithIndex.foreach{ case (nChannels, select) =>
|
||||
val nBanksPerChannel = nBanks / nChannels
|
||||
val ic = Module(new NastiMemoryInterconnect(nBanksPerChannel, nChannels))
|
||||
disconnectMaster(ic.io.slaves)
|
||||
disconnectSlave(ic.io.masters)
|
||||
muxOnSelect( io.masters, ic.io.masters, io.select === UInt(select))
|
||||
muxOnSelect(ic.io.slaves, io.slaves, io.select === UInt(select))
|
||||
}
|
||||
}
|
||||
|
||||
class NastiMemoryDemux(nRoutes: Int)(implicit p: Parameters) extends NastiModule()(p) {
|
||||
val io = new Bundle {
|
||||
val master = (new NastiIO).flip
|
||||
val slaves = Vec(nRoutes, new NastiIO)
|
||||
val select = UInt(INPUT, log2Up(nRoutes))
|
||||
}
|
||||
|
||||
def connectReqChannel[T <: Data](idx: Int, out: DecoupledIO[T], in: DecoupledIO[T]) {
|
||||
out.valid := in.valid && io.select === UInt(idx)
|
||||
out.bits := in.bits
|
||||
when (io.select === UInt(idx)) { in.ready := out.ready }
|
||||
}
|
||||
|
||||
def connectRespChannel[T <: Data](idx: Int, out: DecoupledIO[T], in: DecoupledIO[T]) {
|
||||
when (io.select === UInt(idx)) { out.valid := in.valid }
|
||||
when (io.select === UInt(idx)) { out.bits := in.bits }
|
||||
in.ready := out.ready && io.select === UInt(idx)
|
||||
}
|
||||
|
||||
io.master.ar.ready := Bool(false)
|
||||
io.master.aw.ready := Bool(false)
|
||||
io.master.w.ready := Bool(false)
|
||||
io.master.r.valid := Bool(false)
|
||||
io.master.r.bits := NastiReadDataChannel(id = UInt(0), data = UInt(0))
|
||||
io.master.b.valid := Bool(false)
|
||||
io.master.b.bits := NastiWriteResponseChannel(id = UInt(0))
|
||||
|
||||
io.slaves.zipWithIndex.foreach { case (slave, i) =>
|
||||
connectReqChannel(i, slave.ar, io.master.ar)
|
||||
connectReqChannel(i, slave.aw, io.master.aw)
|
||||
connectReqChannel(i, slave.w, io.master.w)
|
||||
connectRespChannel(i, io.master.r, slave.r)
|
||||
connectRespChannel(i, io.master.b, slave.b)
|
||||
}
|
||||
}
|
||||
|
||||
object AsyncNastiTo {
|
||||
// source(master) is in our clock domain, output is in the 'to' clock domain
|
||||
def apply[T <: Data](to_clock: Clock, to_reset: Bool, source: NastiIO, depth: Int = 3, sync: Int = 2)(implicit p: Parameters): NastiIO = {
|
||||
val sink = Wire(new NastiIO)
|
||||
|
||||
sink.aw <> AsyncDecoupledTo(to_clock, to_reset, source.aw, depth, sync)
|
||||
sink.ar <> AsyncDecoupledTo(to_clock, to_reset, source.ar, depth, sync)
|
||||
sink.w <> AsyncDecoupledTo(to_clock, to_reset, source.w, depth, sync)
|
||||
source.b <> AsyncDecoupledFrom(to_clock, to_reset, sink.b, depth, sync)
|
||||
source.r <> AsyncDecoupledFrom(to_clock, to_reset, sink.r, depth, sync)
|
||||
|
||||
sink
|
||||
}
|
||||
}
|
||||
|
||||
object AsyncNastiFrom {
|
||||
// source(master) is in the 'from' clock domain, output is in our clock domain
|
||||
def apply[T <: Data](from_clock: Clock, from_reset: Bool, source: NastiIO, depth: Int = 3, sync: Int = 2)(implicit p: Parameters): NastiIO = {
|
||||
val sink = Wire(new NastiIO)
|
||||
|
||||
sink.aw <> AsyncDecoupledFrom(from_clock, from_reset, source.aw, depth, sync)
|
||||
sink.ar <> AsyncDecoupledFrom(from_clock, from_reset, source.ar, depth, sync)
|
||||
sink.w <> AsyncDecoupledFrom(from_clock, from_reset, source.w, depth, sync)
|
||||
source.b <> AsyncDecoupledTo(from_clock, from_reset, sink.b, depth, sync)
|
||||
source.r <> AsyncDecoupledTo(from_clock, from_reset, sink.r, depth, sync)
|
||||
|
||||
sink
|
||||
}
|
||||
}
|
||||
1
src/main/scala/junctions/package.scala
Normal file
1
src/main/scala/junctions/package.scala
Normal file
@@ -0,0 +1 @@
|
||||
package object junctions
|
||||
82
src/main/scala/junctions/poci.scala
Normal file
82
src/main/scala/junctions/poci.scala
Normal file
@@ -0,0 +1,82 @@
|
||||
package junctions
|
||||
|
||||
import Chisel._
|
||||
import cde.{Parameters, Field}
|
||||
|
||||
class PociIO(implicit p: Parameters) extends HastiBundle()(p)
|
||||
{
|
||||
val paddr = UInt(OUTPUT, hastiAddrBits)
|
||||
val pwrite = Bool(OUTPUT)
|
||||
val psel = Bool(OUTPUT)
|
||||
val penable = Bool(OUTPUT)
|
||||
val pwdata = UInt(OUTPUT, hastiDataBits)
|
||||
val prdata = UInt(INPUT, hastiDataBits)
|
||||
val pready = Bool(INPUT)
|
||||
val pslverr = Bool(INPUT)
|
||||
}
|
||||
|
||||
class HastiToPociBridge(implicit p: Parameters) extends HastiModule()(p) {
|
||||
val io = new Bundle {
|
||||
val in = new HastiSlaveIO
|
||||
val out = new PociIO
|
||||
}
|
||||
|
||||
val s_idle :: s_setup :: s_access :: Nil = Enum(UInt(), 3)
|
||||
val state = Reg(init = s_idle)
|
||||
val transfer = io.in.hsel & io.in.htrans(1)
|
||||
|
||||
switch (state) {
|
||||
is (s_idle) {
|
||||
when (transfer) { state := s_setup }
|
||||
}
|
||||
is (s_setup) {
|
||||
state := s_access
|
||||
}
|
||||
is (s_access) {
|
||||
when (io.out.pready & ~transfer) { state := s_idle }
|
||||
when (io.out.pready & transfer) { state := s_setup }
|
||||
when (~io.out.pready) { state := s_access }
|
||||
}
|
||||
}
|
||||
|
||||
val haddr_reg = Reg(UInt(width = hastiAddrBits))
|
||||
val hwrite_reg = Reg(UInt(width = 1))
|
||||
when (transfer) {
|
||||
haddr_reg := io.in.haddr
|
||||
hwrite_reg := io.in.hwrite
|
||||
}
|
||||
|
||||
io.out.paddr := haddr_reg
|
||||
io.out.pwrite := hwrite_reg(0)
|
||||
io.out.psel := (state =/= s_idle)
|
||||
io.out.penable := (state === s_access)
|
||||
io.out.pwdata := io.in.hwdata
|
||||
io.in.hrdata := io.out.prdata
|
||||
io.in.hready := ((state === s_access) & io.out.pready) | (state === s_idle)
|
||||
io.in.hresp := io.out.pslverr
|
||||
}
|
||||
|
||||
class PociBus(amap: Seq[UInt=>Bool])(implicit p: Parameters) extends HastiModule()(p)
|
||||
{
|
||||
val io = new Bundle {
|
||||
val master = new PociIO().flip
|
||||
val slaves = Vec(amap.size, new PociIO)
|
||||
}
|
||||
|
||||
val psels = PriorityEncoderOH(
|
||||
(io.slaves zip amap) map { case (s, afn) => {
|
||||
s.paddr := io.master.paddr
|
||||
s.pwrite := io.master.pwrite
|
||||
s.pwdata := io.master.pwdata
|
||||
afn(io.master.paddr) && io.master.psel
|
||||
}})
|
||||
|
||||
(io.slaves zip psels) foreach { case (s, psel) => {
|
||||
s.psel := psel
|
||||
s.penable := io.master.penable && psel
|
||||
} }
|
||||
|
||||
io.master.prdata := Mux1H(psels, io.slaves.map(_.prdata))
|
||||
io.master.pready := Mux1H(psels, io.slaves.map(_.pready))
|
||||
io.master.pslverr := Mux1H(psels, io.slaves.map(_.pslverr))
|
||||
}
|
||||
70
src/main/scala/junctions/slowio.scala
Normal file
70
src/main/scala/junctions/slowio.scala
Normal file
@@ -0,0 +1,70 @@
|
||||
// See LICENSE for license details.
|
||||
|
||||
package junctions
|
||||
import Chisel._
|
||||
|
||||
class SlowIO[T <: Data](val divisor_max: Int)(data: => T) extends Module
|
||||
{
|
||||
val io = new Bundle {
|
||||
val out_fast = Decoupled(data).flip
|
||||
val out_slow = Decoupled(data)
|
||||
val in_fast = Decoupled(data)
|
||||
val in_slow = Decoupled(data).flip
|
||||
val clk_slow = Bool(OUTPUT)
|
||||
val set_divisor = Valid(Bits(width = 32)).flip
|
||||
val divisor = Bits(OUTPUT, 32)
|
||||
}
|
||||
|
||||
require(divisor_max >= 8 && divisor_max <= 65536 && isPow2(divisor_max))
|
||||
val divisor = Reg(init=UInt(divisor_max-1))
|
||||
val d_shadow = Reg(init=UInt(divisor_max-1))
|
||||
val hold = Reg(init=UInt(divisor_max/4-1))
|
||||
val h_shadow = Reg(init=UInt(divisor_max/4-1))
|
||||
when (io.set_divisor.valid) {
|
||||
d_shadow := io.set_divisor.bits(log2Up(divisor_max)-1, 0)
|
||||
h_shadow := io.set_divisor.bits(log2Up(divisor_max)-1+16, 16)
|
||||
}
|
||||
io.divisor := (hold << 16) | divisor
|
||||
|
||||
val count = Reg{UInt(width = log2Up(divisor_max))}
|
||||
val myclock = Reg{Bool()}
|
||||
count := count + UInt(1)
|
||||
|
||||
val rising = count === (divisor >> 1)
|
||||
val falling = count === divisor
|
||||
val held = count === (divisor >> 1) + hold
|
||||
|
||||
when (falling) {
|
||||
divisor := d_shadow
|
||||
hold := h_shadow
|
||||
count := UInt(0)
|
||||
myclock := Bool(false)
|
||||
}
|
||||
when (rising) {
|
||||
myclock := Bool(true)
|
||||
}
|
||||
|
||||
val in_slow_rdy = Reg(init=Bool(false))
|
||||
val out_slow_val = Reg(init=Bool(false))
|
||||
val out_slow_bits = Reg(data)
|
||||
|
||||
val fromhost_q = Module(new Queue(data,1))
|
||||
fromhost_q.io.enq.valid := rising && (io.in_slow.valid && in_slow_rdy || this.reset)
|
||||
fromhost_q.io.enq.bits := io.in_slow.bits
|
||||
io.in_fast <> fromhost_q.io.deq
|
||||
|
||||
val tohost_q = Module(new Queue(data,1))
|
||||
tohost_q.io.enq <> io.out_fast
|
||||
tohost_q.io.deq.ready := rising && io.out_slow.ready && out_slow_val
|
||||
|
||||
when (held) {
|
||||
in_slow_rdy := fromhost_q.io.enq.ready
|
||||
out_slow_val := tohost_q.io.deq.valid
|
||||
out_slow_bits := Mux(this.reset, fromhost_q.io.deq.bits, tohost_q.io.deq.bits)
|
||||
}
|
||||
|
||||
io.in_slow.ready := in_slow_rdy
|
||||
io.out_slow.valid := out_slow_val
|
||||
io.out_slow.bits := out_slow_bits
|
||||
io.clk_slow := myclock
|
||||
}
|
||||
281
src/main/scala/junctions/smi.scala
Normal file
281
src/main/scala/junctions/smi.scala
Normal file
@@ -0,0 +1,281 @@
|
||||
package junctions
|
||||
|
||||
import Chisel._
|
||||
import cde.Parameters
|
||||
|
||||
class SmiReq(val dataWidth: Int, val addrWidth: Int) extends Bundle {
|
||||
val rw = Bool()
|
||||
val addr = UInt(width = addrWidth)
|
||||
val data = Bits(width = dataWidth)
|
||||
|
||||
override def cloneType =
|
||||
new SmiReq(dataWidth, addrWidth).asInstanceOf[this.type]
|
||||
}
|
||||
|
||||
/** Simple Memory Interface IO. Used to communicate with PCR and SCR
|
||||
* @param dataWidth the width in bits of the data field
|
||||
* @param addrWidth the width in bits of the addr field */
|
||||
class SmiIO(val dataWidth: Int, val addrWidth: Int) extends Bundle {
|
||||
val req = Decoupled(new SmiReq(dataWidth, addrWidth))
|
||||
val resp = Decoupled(Bits(width = dataWidth)).flip
|
||||
|
||||
override def cloneType =
|
||||
new SmiIO(dataWidth, addrWidth).asInstanceOf[this.type]
|
||||
}
|
||||
|
||||
abstract class SmiPeripheral extends Module {
|
||||
val dataWidth: Int
|
||||
val addrWidth: Int
|
||||
|
||||
lazy val io = new SmiIO(dataWidth, addrWidth).flip
|
||||
}
|
||||
|
||||
/** A simple sequential memory accessed through Smi */
|
||||
class SmiMem(val dataWidth: Int, val memDepth: Int) extends SmiPeripheral {
|
||||
// override
|
||||
val addrWidth = log2Up(memDepth)
|
||||
|
||||
val mem = SeqMem(memDepth, Bits(width = dataWidth))
|
||||
|
||||
val ren = io.req.fire() && !io.req.bits.rw
|
||||
val wen = io.req.fire() && io.req.bits.rw
|
||||
|
||||
when (wen) { mem.write(io.req.bits.addr, io.req.bits.data) }
|
||||
|
||||
val resp_valid = Reg(init = Bool(false))
|
||||
|
||||
when (io.resp.fire()) { resp_valid := Bool(false) }
|
||||
when (io.req.fire()) { resp_valid := Bool(true) }
|
||||
|
||||
io.resp.valid := resp_valid
|
||||
io.resp.bits := mem.read(io.req.bits.addr, ren)
|
||||
io.req.ready := !resp_valid
|
||||
}
|
||||
|
||||
/** Arbitrate among several Smi clients
|
||||
* @param n the number of clients
|
||||
* @param dataWidth Smi data width
|
||||
* @param addrWidth Smi address width */
|
||||
class SmiArbiter(val n: Int, val dataWidth: Int, val addrWidth: Int)
|
||||
extends Module {
|
||||
val io = new Bundle {
|
||||
val in = Vec(n, new SmiIO(dataWidth, addrWidth)).flip
|
||||
val out = new SmiIO(dataWidth, addrWidth)
|
||||
}
|
||||
|
||||
val wait_resp = Reg(init = Bool(false))
|
||||
val choice = Reg(UInt(width = log2Up(n)))
|
||||
|
||||
val req_arb = Module(new RRArbiter(new SmiReq(dataWidth, addrWidth), n))
|
||||
req_arb.io.in <> io.in.map(_.req)
|
||||
req_arb.io.out.ready := io.out.req.ready && !wait_resp
|
||||
|
||||
io.out.req.bits := req_arb.io.out.bits
|
||||
io.out.req.valid := req_arb.io.out.valid && !wait_resp
|
||||
|
||||
when (io.out.req.fire()) {
|
||||
choice := req_arb.io.chosen
|
||||
wait_resp := Bool(true)
|
||||
}
|
||||
|
||||
when (io.out.resp.fire()) { wait_resp := Bool(false) }
|
||||
|
||||
for ((resp, i) <- io.in.map(_.resp).zipWithIndex) {
|
||||
resp.bits := io.out.resp.bits
|
||||
resp.valid := io.out.resp.valid && choice === UInt(i)
|
||||
}
|
||||
|
||||
io.out.resp.ready := io.in(choice).resp.ready
|
||||
}
|
||||
|
||||
class SmiIONastiReadIOConverter(val dataWidth: Int, val addrWidth: Int)
|
||||
(implicit p: Parameters) extends NastiModule()(p) {
|
||||
val io = new Bundle {
|
||||
val nasti = new NastiReadIO().flip
|
||||
val smi = new SmiIO(dataWidth, addrWidth)
|
||||
}
|
||||
|
||||
private val maxWordsPerBeat = nastiXDataBits / dataWidth
|
||||
private val wordCountBits = log2Up(maxWordsPerBeat)
|
||||
private val byteOffBits = log2Up(dataWidth / 8)
|
||||
private val addrOffBits = addrWidth + byteOffBits
|
||||
|
||||
private def calcWordCount(size: UInt): UInt =
|
||||
(UInt(1) << (size - UInt(byteOffBits))) - UInt(1)
|
||||
|
||||
val (s_idle :: s_read :: s_resp :: Nil) = Enum(Bits(), 3)
|
||||
val state = Reg(init = s_idle)
|
||||
|
||||
val nWords = Reg(UInt(width = wordCountBits))
|
||||
val nBeats = Reg(UInt(width = nastiXLenBits))
|
||||
val addr = Reg(UInt(width = addrWidth))
|
||||
val id = Reg(UInt(width = nastiRIdBits))
|
||||
|
||||
val byteOff = Reg(UInt(width = byteOffBits))
|
||||
val recvInd = Reg(init = UInt(0, wordCountBits))
|
||||
val sendDone = Reg(init = Bool(false))
|
||||
|
||||
val buffer = Reg(init = Vec.fill(maxWordsPerBeat) { Bits(0, dataWidth) })
|
||||
|
||||
io.nasti.ar.ready := (state === s_idle)
|
||||
|
||||
io.smi.req.valid := (state === s_read) && !sendDone
|
||||
io.smi.req.bits.rw := Bool(false)
|
||||
io.smi.req.bits.addr := addr
|
||||
|
||||
io.smi.resp.ready := (state === s_read)
|
||||
|
||||
io.nasti.r.valid := (state === s_resp)
|
||||
io.nasti.r.bits := NastiReadDataChannel(
|
||||
id = id,
|
||||
data = buffer.asUInt,
|
||||
last = (nBeats === UInt(0)))
|
||||
|
||||
when (io.nasti.ar.fire()) {
|
||||
when (io.nasti.ar.bits.size < UInt(byteOffBits)) {
|
||||
nWords := UInt(0)
|
||||
} .otherwise {
|
||||
nWords := calcWordCount(io.nasti.ar.bits.size)
|
||||
}
|
||||
nBeats := io.nasti.ar.bits.len
|
||||
addr := io.nasti.ar.bits.addr(addrOffBits - 1, byteOffBits)
|
||||
if (maxWordsPerBeat > 1)
|
||||
recvInd := io.nasti.ar.bits.addr(wordCountBits + byteOffBits - 1, byteOffBits)
|
||||
else
|
||||
recvInd := UInt(0)
|
||||
id := io.nasti.ar.bits.id
|
||||
state := s_read
|
||||
}
|
||||
|
||||
when (io.smi.req.fire()) {
|
||||
addr := addr + UInt(1)
|
||||
sendDone := (nWords === UInt(0))
|
||||
}
|
||||
|
||||
when (io.smi.resp.fire()) {
|
||||
recvInd := recvInd + UInt(1)
|
||||
nWords := nWords - UInt(1)
|
||||
buffer(recvInd) := io.smi.resp.bits
|
||||
when (nWords === UInt(0)) { state := s_resp }
|
||||
}
|
||||
|
||||
when (io.nasti.r.fire()) {
|
||||
recvInd := UInt(0)
|
||||
sendDone := Bool(false)
|
||||
// clear all the registers in the buffer
|
||||
buffer.foreach(_ := Bits(0))
|
||||
nBeats := nBeats - UInt(1)
|
||||
state := Mux(io.nasti.r.bits.last, s_idle, s_read)
|
||||
}
|
||||
}
|
||||
|
||||
class SmiIONastiWriteIOConverter(val dataWidth: Int, val addrWidth: Int)
|
||||
(implicit p: Parameters) extends NastiModule()(p) {
|
||||
val io = new Bundle {
|
||||
val nasti = new NastiWriteIO().flip
|
||||
val smi = new SmiIO(dataWidth, addrWidth)
|
||||
}
|
||||
|
||||
private val dataBytes = dataWidth / 8
|
||||
private val maxWordsPerBeat = nastiXDataBits / dataWidth
|
||||
private val byteOffBits = log2Floor(dataBytes)
|
||||
private val addrOffBits = addrWidth + byteOffBits
|
||||
private val nastiByteOffBits = log2Ceil(nastiXDataBits / 8)
|
||||
|
||||
assert(!io.nasti.aw.valid || io.nasti.aw.bits.size >= UInt(byteOffBits),
|
||||
"Nasti size must be >= Smi size")
|
||||
|
||||
val id = Reg(UInt(width = nastiWIdBits))
|
||||
val addr = Reg(UInt(width = addrWidth))
|
||||
val offset = Reg(UInt(width = nastiByteOffBits))
|
||||
|
||||
def makeStrobe(offset: UInt, size: UInt, strb: UInt) = {
|
||||
val sizemask = (UInt(1) << (UInt(1) << size)) - UInt(1)
|
||||
val bytemask = strb & (sizemask << offset)
|
||||
Vec.tabulate(maxWordsPerBeat){i => bytemask(dataBytes * i)}.asUInt
|
||||
}
|
||||
|
||||
val size = Reg(UInt(width = nastiXSizeBits))
|
||||
val strb = Reg(UInt(width = maxWordsPerBeat))
|
||||
val data = Reg(UInt(width = nastiXDataBits))
|
||||
val last = Reg(Bool())
|
||||
|
||||
val s_idle :: s_data :: s_send :: s_ack :: s_resp :: Nil = Enum(Bits(), 5)
|
||||
val state = Reg(init = s_idle)
|
||||
|
||||
io.nasti.aw.ready := (state === s_idle)
|
||||
io.nasti.w.ready := (state === s_data)
|
||||
io.smi.req.valid := (state === s_send) && strb(0)
|
||||
io.smi.req.bits.rw := Bool(true)
|
||||
io.smi.req.bits.addr := addr
|
||||
io.smi.req.bits.data := data(dataWidth - 1, 0)
|
||||
io.smi.resp.ready := (state === s_ack)
|
||||
io.nasti.b.valid := (state === s_resp)
|
||||
io.nasti.b.bits := NastiWriteResponseChannel(id)
|
||||
|
||||
val jump = if (maxWordsPerBeat > 1)
|
||||
PriorityMux(strb(maxWordsPerBeat - 1, 1),
|
||||
(1 until maxWordsPerBeat).map(UInt(_)))
|
||||
else UInt(1)
|
||||
|
||||
when (io.nasti.aw.fire()) {
|
||||
if (dataWidth == nastiXDataBits) {
|
||||
addr := io.nasti.aw.bits.addr(addrOffBits - 1, byteOffBits)
|
||||
} else {
|
||||
addr := Cat(io.nasti.aw.bits.addr(addrOffBits - 1, nastiByteOffBits),
|
||||
UInt(0, nastiByteOffBits - byteOffBits))
|
||||
}
|
||||
offset := io.nasti.aw.bits.addr(nastiByteOffBits - 1, 0)
|
||||
id := io.nasti.aw.bits.id
|
||||
size := io.nasti.aw.bits.size
|
||||
last := Bool(false)
|
||||
state := s_data
|
||||
}
|
||||
|
||||
when (io.nasti.w.fire()) {
|
||||
last := io.nasti.w.bits.last
|
||||
strb := makeStrobe(offset, size, io.nasti.w.bits.strb)
|
||||
data := io.nasti.w.bits.data
|
||||
state := s_send
|
||||
}
|
||||
|
||||
when (state === s_send) {
|
||||
when (io.smi.req.ready || !strb(0)) {
|
||||
strb := strb >> jump
|
||||
data := data >> Cat(jump, UInt(0, log2Up(dataWidth)))
|
||||
addr := addr + jump
|
||||
when (strb(0)) { state := s_ack }
|
||||
}
|
||||
}
|
||||
|
||||
when (io.smi.resp.fire()) {
|
||||
state := Mux(strb === UInt(0),
|
||||
Mux(last, s_resp, s_data), s_send)
|
||||
}
|
||||
|
||||
when (io.nasti.b.fire()) { state := s_idle }
|
||||
}
|
||||
|
||||
/** Convert Nasti protocol to Smi protocol */
|
||||
class SmiIONastiIOConverter(val dataWidth: Int, val addrWidth: Int)
|
||||
(implicit p: Parameters) extends NastiModule()(p) {
|
||||
val io = new Bundle {
|
||||
val nasti = (new NastiIO).flip
|
||||
val smi = new SmiIO(dataWidth, addrWidth)
|
||||
}
|
||||
|
||||
require(isPow2(dataWidth), "SMI data width must be power of 2")
|
||||
require(dataWidth <= nastiXDataBits,
|
||||
"SMI data width must be less than or equal to NASTI data width")
|
||||
|
||||
val reader = Module(new SmiIONastiReadIOConverter(dataWidth, addrWidth))
|
||||
reader.io.nasti <> io.nasti
|
||||
|
||||
val writer = Module(new SmiIONastiWriteIOConverter(dataWidth, addrWidth))
|
||||
writer.io.nasti <> io.nasti
|
||||
|
||||
val arb = Module(new SmiArbiter(2, dataWidth, addrWidth))
|
||||
arb.io.in(0) <> reader.io.smi
|
||||
arb.io.in(1) <> writer.io.smi
|
||||
io.smi <> arb.io.out
|
||||
}
|
||||
187
src/main/scala/junctions/stream.scala
Normal file
187
src/main/scala/junctions/stream.scala
Normal file
@@ -0,0 +1,187 @@
|
||||
package junctions
|
||||
|
||||
import Chisel._
|
||||
import NastiConstants._
|
||||
import cde.Parameters
|
||||
|
||||
class StreamChannel(w: Int) extends Bundle {
|
||||
val data = UInt(width = w)
|
||||
val last = Bool()
|
||||
|
||||
override def cloneType = new StreamChannel(w).asInstanceOf[this.type]
|
||||
}
|
||||
|
||||
class StreamIO(w: Int) extends Bundle {
|
||||
val out = Decoupled(new StreamChannel(w))
|
||||
val in = Decoupled(new StreamChannel(w)).flip
|
||||
|
||||
override def cloneType = new StreamIO(w).asInstanceOf[this.type]
|
||||
}
|
||||
|
||||
class NastiIOStreamIOConverter(w: Int)(implicit p: Parameters) extends Module {
|
||||
val io = new Bundle {
|
||||
val nasti = (new NastiIO).flip
|
||||
val stream = new StreamIO(w)
|
||||
}
|
||||
|
||||
val streamSize = UInt(log2Up(w / 8))
|
||||
assert(!io.nasti.ar.valid || io.nasti.ar.bits.size === streamSize,
|
||||
"read channel wrong size on stream")
|
||||
assert(!io.nasti.ar.valid || io.nasti.ar.bits.len === UInt(0) ||
|
||||
io.nasti.ar.bits.burst === BURST_FIXED,
|
||||
"read channel wrong burst type on stream")
|
||||
assert(!io.nasti.aw.valid || io.nasti.aw.bits.size === streamSize,
|
||||
"write channel wrong size on stream")
|
||||
assert(!io.nasti.aw.valid || io.nasti.aw.bits.len === UInt(0) ||
|
||||
io.nasti.aw.bits.burst === BURST_FIXED,
|
||||
"write channel wrong burst type on stream")
|
||||
assert(!io.nasti.w.valid || io.nasti.w.bits.strb.andR,
|
||||
"write channel cannot take partial writes")
|
||||
|
||||
val read_id = Reg(io.nasti.ar.bits.id)
|
||||
val read_cnt = Reg(io.nasti.ar.bits.len)
|
||||
val reading = Reg(init = Bool(false))
|
||||
|
||||
io.nasti.ar.ready := !reading
|
||||
io.nasti.r.valid := reading && io.stream.in.valid
|
||||
io.nasti.r.bits := io.stream.in.bits
|
||||
io.nasti.r.bits.resp := UInt(0)
|
||||
io.nasti.r.bits.id := read_id
|
||||
io.stream.in.ready := reading && io.nasti.r.ready
|
||||
|
||||
when (io.nasti.ar.fire()) {
|
||||
read_id := io.nasti.ar.bits.id
|
||||
read_cnt := io.nasti.ar.bits.len
|
||||
reading := Bool(true)
|
||||
}
|
||||
|
||||
when (io.nasti.r.fire()) {
|
||||
when (read_cnt === UInt(0)) {
|
||||
reading := Bool(false)
|
||||
} .otherwise {
|
||||
read_cnt := read_cnt - UInt(1)
|
||||
}
|
||||
}
|
||||
|
||||
val write_id = Reg(io.nasti.aw.bits.id)
|
||||
val writing = Reg(init = Bool(false))
|
||||
val write_resp = Reg(init = Bool(false))
|
||||
|
||||
io.nasti.aw.ready := !writing && !write_resp
|
||||
io.nasti.w.ready := writing && io.stream.out.ready
|
||||
io.stream.out.valid := writing && io.nasti.w.valid
|
||||
io.stream.out.bits := io.nasti.w.bits
|
||||
io.nasti.b.valid := write_resp
|
||||
io.nasti.b.bits.resp := UInt(0)
|
||||
io.nasti.b.bits.id := write_id
|
||||
|
||||
when (io.nasti.aw.fire()) {
|
||||
write_id := io.nasti.aw.bits.id
|
||||
writing := Bool(true)
|
||||
}
|
||||
|
||||
when (io.nasti.w.fire() && io.nasti.w.bits.last) {
|
||||
writing := Bool(false)
|
||||
write_resp := Bool(true)
|
||||
}
|
||||
|
||||
when (io.nasti.b.fire()) { write_resp := Bool(false) }
|
||||
}
|
||||
|
||||
class StreamNarrower(win: Int, wout: Int) extends Module {
|
||||
require(win > wout, "Stream narrower input width must be larger than input width")
|
||||
require(win % wout == 0, "Stream narrower input width must be multiple of output width")
|
||||
|
||||
val io = new Bundle {
|
||||
val in = Decoupled(new StreamChannel(win)).flip
|
||||
val out = Decoupled(new StreamChannel(wout))
|
||||
}
|
||||
|
||||
val n_pieces = win / wout
|
||||
val buffer = Reg(Bits(width = win))
|
||||
val (piece_idx, pkt_done) = Counter(io.out.fire(), n_pieces)
|
||||
val pieces = Vec.tabulate(n_pieces) { i => buffer(wout * (i + 1) - 1, wout * i) }
|
||||
val last_piece = (piece_idx === UInt(n_pieces - 1))
|
||||
val sending = Reg(init = Bool(false))
|
||||
val in_last = Reg(Bool())
|
||||
|
||||
when (io.in.fire()) {
|
||||
buffer := io.in.bits.data
|
||||
in_last := io.in.bits.last
|
||||
sending := Bool(true)
|
||||
}
|
||||
when (pkt_done) { sending := Bool(false) }
|
||||
|
||||
io.out.valid := sending
|
||||
io.out.bits.data := pieces(piece_idx)
|
||||
io.out.bits.last := in_last && last_piece
|
||||
io.in.ready := !sending
|
||||
}
|
||||
|
||||
class StreamExpander(win: Int, wout: Int) extends Module {
|
||||
require(win < wout, "Stream expander input width must be smaller than input width")
|
||||
require(wout % win == 0, "Stream narrower output width must be multiple of input width")
|
||||
|
||||
val io = new Bundle {
|
||||
val in = Decoupled(new StreamChannel(win)).flip
|
||||
val out = Decoupled(new StreamChannel(wout))
|
||||
}
|
||||
|
||||
val n_pieces = wout / win
|
||||
val buffer = Reg(Vec(n_pieces, UInt(width = win)))
|
||||
val last = Reg(Bool())
|
||||
val collecting = Reg(init = Bool(true))
|
||||
val (piece_idx, pkt_done) = Counter(io.in.fire(), n_pieces)
|
||||
|
||||
when (io.in.fire()) { buffer(piece_idx) := io.in.bits.data }
|
||||
when (pkt_done) { last := io.in.bits.last; collecting := Bool(false) }
|
||||
when (io.out.fire()) { collecting := Bool(true) }
|
||||
|
||||
io.in.ready := collecting
|
||||
io.out.valid := !collecting
|
||||
io.out.bits.data := buffer.asUInt
|
||||
io.out.bits.last := last
|
||||
}
|
||||
|
||||
object StreamUtils {
|
||||
def connectStreams(a: StreamIO, b: StreamIO) {
|
||||
a.in <> b.out
|
||||
b.in <> a.out
|
||||
}
|
||||
}
|
||||
|
||||
trait Serializable {
|
||||
def nbits: Int
|
||||
}
|
||||
|
||||
class Serializer[T <: Data with Serializable](w: Int, typ: T) extends Module {
|
||||
val io = new Bundle {
|
||||
val in = Decoupled(typ).flip
|
||||
val out = Decoupled(Bits(width = w))
|
||||
}
|
||||
|
||||
val narrower = Module(new StreamNarrower(typ.nbits, w))
|
||||
narrower.io.in.bits.data := io.in.bits.asUInt
|
||||
narrower.io.in.bits.last := Bool(true)
|
||||
narrower.io.in.valid := io.in.valid
|
||||
io.in.ready := narrower.io.in.ready
|
||||
io.out.valid := narrower.io.out.valid
|
||||
io.out.bits := narrower.io.out.bits.data
|
||||
narrower.io.out.ready := io.out.ready
|
||||
}
|
||||
|
||||
class Deserializer[T <: Data with Serializable](w: Int, typ: T) extends Module {
|
||||
val io = new Bundle {
|
||||
val in = Decoupled(Bits(width = w)).flip
|
||||
val out = Decoupled(typ)
|
||||
}
|
||||
|
||||
val expander = Module(new StreamExpander(w, typ.nbits))
|
||||
expander.io.in.valid := io.in.valid
|
||||
expander.io.in.bits.data := io.in.bits
|
||||
expander.io.in.bits.last := Bool(true)
|
||||
io.in.ready := expander.io.in.ready
|
||||
io.out.valid := expander.io.out.valid
|
||||
io.out.bits := typ.cloneType.fromBits(expander.io.out.bits.data)
|
||||
expander.io.out.ready := io.out.ready
|
||||
}
|
||||
163
src/main/scala/junctions/unittests/MiscNastiTests.scala
Normal file
163
src/main/scala/junctions/unittests/MiscNastiTests.scala
Normal file
@@ -0,0 +1,163 @@
|
||||
package junctions.unittests
|
||||
|
||||
import Chisel._
|
||||
import junctions._
|
||||
import junctions.NastiConstants._
|
||||
import cde.Parameters
|
||||
|
||||
class NastiDriver(dataWidth: Int, burstLen: Int, nBursts: Int)
|
||||
(implicit p: Parameters) extends NastiModule {
|
||||
val io = new Bundle {
|
||||
val nasti = new NastiIO
|
||||
val finished = Bool(OUTPUT)
|
||||
val start = Bool(INPUT)
|
||||
}
|
||||
|
||||
val dataBytes = dataWidth / 8
|
||||
val nastiDataBytes = nastiXDataBits / 8
|
||||
|
||||
val (write_cnt, write_done) = Counter(io.nasti.w.fire(), burstLen)
|
||||
val (read_cnt, read_done) = Counter(io.nasti.r.fire(), burstLen)
|
||||
val (req_cnt, reqs_done) = Counter(read_done, nBursts)
|
||||
|
||||
val req_addr = Cat(req_cnt, UInt(0, log2Up(burstLen * dataBytes)))
|
||||
|
||||
val write_data = UInt(0x10000000L, dataWidth) | Cat(req_cnt, write_cnt)
|
||||
val expected_data = UInt(0x10000000L, dataWidth) | Cat(req_cnt, read_cnt)
|
||||
|
||||
val (s_idle :: s_write_addr :: s_write_data :: s_write_stall :: s_write_resp ::
|
||||
s_read_addr :: s_read_data :: s_read_stall :: s_done :: Nil) = Enum(Bits(), 9)
|
||||
val state = Reg(init = s_idle)
|
||||
|
||||
val (stall_cnt, stall_done) = Counter(state === s_read_stall, 2)
|
||||
|
||||
io.nasti.aw.valid := (state === s_write_addr)
|
||||
io.nasti.aw.bits := NastiWriteAddressChannel(
|
||||
id = UInt(0),
|
||||
addr = req_addr,
|
||||
size = UInt(log2Up(dataBytes)),
|
||||
len = UInt(burstLen - 1))
|
||||
|
||||
io.nasti.w.valid := (state === s_write_data)
|
||||
io.nasti.w.bits := NastiWriteDataChannel(
|
||||
data = Cat(write_data, write_data),
|
||||
last = (write_cnt === UInt(burstLen - 1)))
|
||||
|
||||
io.nasti.b.ready := (state === s_write_resp)
|
||||
|
||||
io.nasti.ar.valid := (state === s_read_addr)
|
||||
io.nasti.ar.bits := NastiReadAddressChannel(
|
||||
id = UInt(0),
|
||||
addr = req_addr,
|
||||
size = UInt(log2Up(dataBytes)),
|
||||
len = UInt(burstLen - 1))
|
||||
|
||||
io.nasti.r.ready := (state === s_read_data)
|
||||
|
||||
io.finished := (state === s_done)
|
||||
|
||||
when (state === s_idle && io.start) { state := s_write_addr }
|
||||
when (io.nasti.aw.fire()) { state := s_write_data }
|
||||
when (io.nasti.w.fire()) { state := s_write_stall }
|
||||
when (state === s_write_stall) { state := s_write_data }
|
||||
when (write_done) { state := s_write_resp }
|
||||
when (io.nasti.b.fire()) { state := s_read_addr }
|
||||
when (io.nasti.ar.fire()) { state := s_read_data }
|
||||
when (io.nasti.r.fire()) { state := s_read_stall }
|
||||
when (stall_done) { state := s_read_data }
|
||||
when (read_done) { state := s_write_addr }
|
||||
when (reqs_done) { state := s_done }
|
||||
|
||||
val full_addr = req_addr + (read_cnt << UInt(log2Up(dataBytes)))
|
||||
val byteshift = full_addr(log2Up(nastiDataBytes) - 1, 0)
|
||||
val bitshift = Cat(byteshift, UInt(0, 3))
|
||||
val read_data = (io.nasti.r.bits.data >> bitshift) & Fill(dataWidth, UInt(1, 1))
|
||||
|
||||
assert(!io.nasti.r.valid || read_data === expected_data,
|
||||
s"NastiDriver got wrong data")
|
||||
}
|
||||
|
||||
|
||||
class AtosConverterTestBackend(implicit p: Parameters) extends NastiModule()(p) {
|
||||
val io = new Bundle {
|
||||
val nasti = (new NastiIO).flip
|
||||
val finished = Bool(OUTPUT)
|
||||
}
|
||||
|
||||
val (s_waddr :: s_wdata :: s_wresp ::
|
||||
s_raddr :: s_rresp :: s_done :: Nil) = Enum(Bits(), 6)
|
||||
val state = Reg(init = s_waddr)
|
||||
|
||||
val n_words = 4
|
||||
val test_data = Reg(Vec(n_words, UInt(width = nastiXDataBits)))
|
||||
val req_id = Reg(UInt(width = nastiXIdBits))
|
||||
|
||||
val (w_count, w_last) = Counter(io.nasti.w.fire(), n_words)
|
||||
val (r_count, r_last) = Counter(io.nasti.r.fire(), n_words)
|
||||
|
||||
when (io.nasti.aw.fire()) {
|
||||
req_id := io.nasti.aw.bits.id
|
||||
state := s_wdata
|
||||
}
|
||||
when (io.nasti.w.fire()) {
|
||||
test_data(w_count) := io.nasti.w.bits.data
|
||||
when (io.nasti.w.bits.last) { state := s_wresp }
|
||||
}
|
||||
when (io.nasti.b.fire()) { state := s_raddr }
|
||||
when (io.nasti.ar.fire()) {
|
||||
req_id := io.nasti.ar.bits.id
|
||||
state := s_rresp
|
||||
}
|
||||
when (io.nasti.r.fire() && io.nasti.r.bits.last) { state := s_done }
|
||||
|
||||
io.nasti.aw.ready := (state === s_waddr)
|
||||
io.nasti.w.ready := (state === s_wdata)
|
||||
io.nasti.ar.ready := (state === s_raddr)
|
||||
|
||||
io.nasti.b.valid := (state === s_wresp)
|
||||
io.nasti.b.bits := NastiWriteResponseChannel(id = req_id)
|
||||
|
||||
io.nasti.r.valid := (state === s_rresp)
|
||||
io.nasti.r.bits := NastiReadDataChannel(
|
||||
id = req_id,
|
||||
data = test_data(r_count),
|
||||
last = r_last)
|
||||
|
||||
io.finished := (state === s_done)
|
||||
}
|
||||
|
||||
class AtosConverterTest(implicit val p: Parameters) extends UnitTest
|
||||
with HasNastiParameters {
|
||||
val frontend = Module(new NastiDriver(nastiXDataBits, 4, 1))
|
||||
val backend = Module(new AtosConverterTestBackend)
|
||||
|
||||
val serdes = Module(new AtosSerdes(8))
|
||||
val desser = Module(new AtosDesser(8))
|
||||
|
||||
val client_conv = Module(new AtosClientConverter)
|
||||
val manager_conv = Module(new AtosManagerConverter)
|
||||
|
||||
client_conv.io.nasti <> frontend.io.nasti
|
||||
serdes.io.wide <> client_conv.io.atos
|
||||
desser.io.narrow <> serdes.io.narrow
|
||||
manager_conv.io.atos <> desser.io.wide
|
||||
backend.io.nasti <> manager_conv.io.nasti
|
||||
frontend.io.start := io.start
|
||||
|
||||
io.finished := frontend.io.finished && backend.io.finished
|
||||
}
|
||||
|
||||
class HastiTest(implicit p: Parameters) extends UnitTest {
|
||||
val sram = Module(new HastiTestSRAM(8))
|
||||
val bus = Module(new HastiBus(Seq(a => Bool(true))))
|
||||
val conv = Module(new HastiMasterIONastiIOConverter)
|
||||
val driver = Module(new NastiDriver(32, 8, 2))
|
||||
|
||||
bus.io.slaves(0) <> sram.io
|
||||
bus.io.master <> conv.io.hasti
|
||||
conv.io.nasti <> driver.io.nasti
|
||||
io.finished := driver.io.finished
|
||||
driver.io.start := io.start
|
||||
}
|
||||
|
||||
|
||||
85
src/main/scala/junctions/unittests/MultiWidthFifoTest.scala
Normal file
85
src/main/scala/junctions/unittests/MultiWidthFifoTest.scala
Normal file
@@ -0,0 +1,85 @@
|
||||
package junctions.unittests
|
||||
|
||||
import Chisel._
|
||||
import junctions._
|
||||
import junctions.NastiConstants._
|
||||
|
||||
class MultiWidthFifoTest extends UnitTest {
|
||||
val big2little = Module(new MultiWidthFifo(16, 8, 8))
|
||||
val little2big = Module(new MultiWidthFifo(8, 16, 4))
|
||||
|
||||
val bl_send = Reg(init = Bool(false))
|
||||
val lb_send = Reg(init = Bool(false))
|
||||
val bl_recv = Reg(init = Bool(false))
|
||||
val lb_recv = Reg(init = Bool(false))
|
||||
val bl_finished = Reg(init = Bool(false))
|
||||
val lb_finished = Reg(init = Bool(false))
|
||||
|
||||
val bl_data = Vec.tabulate(4){i => UInt((2 * i + 1) * 256 + 2 * i, 16)}
|
||||
val lb_data = Vec.tabulate(8){i => UInt(i, 8)}
|
||||
|
||||
val (bl_send_cnt, bl_send_done) = Counter(big2little.io.in.fire(), 4)
|
||||
val (lb_send_cnt, lb_send_done) = Counter(little2big.io.in.fire(), 8)
|
||||
|
||||
val (bl_recv_cnt, bl_recv_done) = Counter(big2little.io.out.fire(), 8)
|
||||
val (lb_recv_cnt, lb_recv_done) = Counter(little2big.io.out.fire(), 4)
|
||||
|
||||
big2little.io.in.valid := bl_send
|
||||
big2little.io.in.bits := bl_data(bl_send_cnt)
|
||||
big2little.io.out.ready := bl_recv
|
||||
|
||||
little2big.io.in.valid := lb_send
|
||||
little2big.io.in.bits := lb_data(lb_send_cnt)
|
||||
little2big.io.out.ready := lb_recv
|
||||
|
||||
val bl_recv_data_idx = bl_recv_cnt >> UInt(1)
|
||||
val bl_recv_data = Mux(bl_recv_cnt(0),
|
||||
bl_data(bl_recv_data_idx)(15, 8),
|
||||
bl_data(bl_recv_data_idx)(7, 0))
|
||||
|
||||
val lb_recv_data = Cat(
|
||||
lb_data(Cat(lb_recv_cnt, UInt(1, 1))),
|
||||
lb_data(Cat(lb_recv_cnt, UInt(0, 1))))
|
||||
|
||||
when (io.start) {
|
||||
bl_send := Bool(true)
|
||||
lb_send := Bool(true)
|
||||
}
|
||||
|
||||
when (bl_send_done) {
|
||||
bl_send := Bool(false)
|
||||
bl_recv := Bool(true)
|
||||
}
|
||||
|
||||
when (lb_send_done) {
|
||||
lb_send := Bool(false)
|
||||
lb_recv := Bool(true)
|
||||
}
|
||||
|
||||
when (bl_recv_done) {
|
||||
bl_recv := Bool(false)
|
||||
bl_finished := Bool(true)
|
||||
}
|
||||
|
||||
when (lb_recv_done) {
|
||||
lb_recv := Bool(false)
|
||||
lb_finished := Bool(true)
|
||||
}
|
||||
|
||||
io.finished := bl_finished && lb_finished
|
||||
|
||||
val bl_start_recv = Reg(next = bl_send_done)
|
||||
val lb_start_recv = Reg(next = lb_send_done)
|
||||
|
||||
assert(!little2big.io.out.valid || little2big.io.out.bits === lb_recv_data,
|
||||
"Little to Big data mismatch")
|
||||
assert(!big2little.io.out.valid || big2little.io.out.bits === bl_recv_data,
|
||||
"Bit to Little data mismatch")
|
||||
|
||||
assert(!lb_start_recv || little2big.io.count === UInt(4),
|
||||
"Little to Big count incorrect")
|
||||
assert(!bl_start_recv || big2little.io.count === UInt(8),
|
||||
"Big to Little count incorrect")
|
||||
}
|
||||
|
||||
|
||||
111
src/main/scala/junctions/unittests/NastiDemuxTest.scala
Normal file
111
src/main/scala/junctions/unittests/NastiDemuxTest.scala
Normal file
@@ -0,0 +1,111 @@
|
||||
package junctions.unittests
|
||||
|
||||
import Chisel._
|
||||
import junctions._
|
||||
import junctions.NastiConstants._
|
||||
import cde.Parameters
|
||||
|
||||
class NastiDemuxDriver(n: Int)(implicit p: Parameters) extends Module {
|
||||
val io = new Bundle {
|
||||
val start = Bool(INPUT)
|
||||
val finished = Bool(OUTPUT)
|
||||
val nasti = new NastiIO
|
||||
val select = UInt(OUTPUT, log2Up(n))
|
||||
}
|
||||
|
||||
val (s_idle :: s_write_addr :: s_write_data :: s_write_resp ::
|
||||
s_read_addr :: s_read_resp :: s_done :: Nil) = Enum(Bits(), 7)
|
||||
val state = Reg(init = s_idle)
|
||||
|
||||
val select = Reg(init = UInt(0, log2Up(n)))
|
||||
|
||||
when (state === s_idle && io.start) { state := s_write_addr }
|
||||
when (io.nasti.aw.fire()) { state := s_write_data }
|
||||
when (io.nasti.w.fire()) { state := s_write_resp }
|
||||
when (io.nasti.b.fire()) { state := s_read_addr }
|
||||
when (io.nasti.ar.fire()) { state := s_read_resp }
|
||||
when (io.nasti.r.fire()) {
|
||||
when (select === UInt(n - 1)) {
|
||||
state := s_done
|
||||
} .otherwise {
|
||||
select := select + UInt(1)
|
||||
state := s_write_addr
|
||||
}
|
||||
}
|
||||
|
||||
io.nasti.aw.valid := (state === s_write_addr)
|
||||
io.nasti.aw.bits := NastiWriteAddressChannel(
|
||||
id = UInt(0),
|
||||
addr = UInt(0),
|
||||
size = UInt("b011"))
|
||||
io.nasti.w.valid := (state === s_write_data)
|
||||
io.nasti.w.bits := NastiWriteDataChannel(data = select)
|
||||
io.nasti.b.ready := (state === s_write_resp)
|
||||
io.nasti.ar.valid := (state === s_read_addr)
|
||||
io.nasti.ar.bits := NastiReadAddressChannel(
|
||||
id = UInt(0),
|
||||
addr = UInt(0),
|
||||
size = UInt("b011"))
|
||||
io.nasti.r.ready := (state === s_read_resp)
|
||||
|
||||
io.finished := (state === s_done)
|
||||
io.select := select
|
||||
|
||||
assert(!io.nasti.r.valid || io.nasti.r.bits.data === select,
|
||||
"NASTI DeMux test: Read data did not match")
|
||||
}
|
||||
|
||||
class NastiDemuxSlave(implicit p: Parameters) extends NastiModule()(p) {
|
||||
val io = (new NastiIO).flip
|
||||
|
||||
val (s_write_wait :: s_write_data :: s_write_resp ::
|
||||
s_read_wait :: s_read_resp :: s_done :: Nil) = Enum(Bits(), 6)
|
||||
val state = Reg(init = s_write_wait)
|
||||
|
||||
val value = Reg(UInt(width = 64))
|
||||
val id = Reg(UInt(width = nastiXIdBits))
|
||||
|
||||
when (io.aw.fire()) {
|
||||
id := io.aw.bits.id
|
||||
state := s_write_data
|
||||
}
|
||||
|
||||
when (io.w.fire()) {
|
||||
value := io.w.bits.data
|
||||
state := s_write_resp
|
||||
}
|
||||
|
||||
when (io.b.fire()) { state := s_read_wait }
|
||||
|
||||
when (io.ar.fire()) {
|
||||
id := io.ar.bits.id
|
||||
state := s_read_resp
|
||||
}
|
||||
|
||||
when (io.r.fire()) { state := s_done }
|
||||
|
||||
io.aw.ready := (state === s_write_wait)
|
||||
io.w.ready := (state === s_write_data)
|
||||
io.b.valid := (state === s_write_resp)
|
||||
io.b.bits := NastiWriteResponseChannel(id = id)
|
||||
io.ar.ready := (state === s_read_wait)
|
||||
io.r.valid := (state === s_read_resp)
|
||||
io.r.bits := NastiReadDataChannel(id = id, data = value)
|
||||
}
|
||||
|
||||
class NastiMemoryDemuxTest(implicit p: Parameters) extends UnitTest {
|
||||
val nSlaves = 4
|
||||
|
||||
val driver = Module(new NastiDemuxDriver(nSlaves))
|
||||
driver.io.start := io.start
|
||||
io.finished := driver.io.finished
|
||||
|
||||
val demux = Module(new NastiMemoryDemux(nSlaves))
|
||||
demux.io.master <> driver.io.nasti
|
||||
demux.io.select := driver.io.select
|
||||
|
||||
for (i <- 0 until nSlaves) {
|
||||
val slave = Module(new NastiDemuxSlave)
|
||||
slave.io <> demux.io.slaves(i)
|
||||
}
|
||||
}
|
||||
65
src/main/scala/junctions/unittests/UnitTest.scala
Normal file
65
src/main/scala/junctions/unittests/UnitTest.scala
Normal file
@@ -0,0 +1,65 @@
|
||||
package junctions.unittests
|
||||
|
||||
import Chisel._
|
||||
import junctions._
|
||||
import cde.{Field, Parameters}
|
||||
|
||||
abstract class UnitTest extends Module {
|
||||
val io = new Bundle {
|
||||
val finished = Bool(OUTPUT)
|
||||
val start = Bool(INPUT)
|
||||
}
|
||||
|
||||
when (io.start) {
|
||||
printf(s"Started UnitTest ${this.getClass.getSimpleName}\n")
|
||||
}
|
||||
}
|
||||
|
||||
case object UnitTests extends Field[Parameters => Seq[UnitTest]]
|
||||
|
||||
class UnitTestSuite(implicit p: Parameters) extends Module {
|
||||
val io = new Bundle {
|
||||
val finished = Bool(OUTPUT)
|
||||
}
|
||||
|
||||
val tests = p(UnitTests)(p)
|
||||
|
||||
val s_idle :: s_start :: s_wait :: s_done :: Nil = Enum(Bits(), 4)
|
||||
val state = Reg(init = s_idle)
|
||||
val test_idx = Reg(init = UInt(0, log2Up(tests.size)))
|
||||
val test_finished = Vec(tests.map(_.io.finished))
|
||||
|
||||
when (state === s_idle) { state := s_start }
|
||||
when (state === s_start) { state := s_wait }
|
||||
when (state === s_wait && test_finished(test_idx)) {
|
||||
state := s_start
|
||||
test_idx := test_idx + UInt(1)
|
||||
state := Mux(test_idx === UInt(tests.size - 1), s_done, s_start)
|
||||
}
|
||||
|
||||
val timer = Module(new Timer(1000, tests.size))
|
||||
timer.io.start.valid := Bool(false)
|
||||
timer.io.stop.valid := Bool(false)
|
||||
|
||||
tests.zipWithIndex.foreach { case (mod, i) =>
|
||||
mod.io.start := (state === s_start) && test_idx === UInt(i)
|
||||
when (test_idx === UInt(i)) {
|
||||
timer.io.start.valid := mod.io.start
|
||||
timer.io.start.bits := UInt(i)
|
||||
timer.io.stop.valid := mod.io.finished
|
||||
timer.io.stop.bits := UInt(i)
|
||||
}
|
||||
}
|
||||
io.finished := (state === s_done)
|
||||
|
||||
assert(!timer.io.timeout.valid, "UnitTest timed out")
|
||||
}
|
||||
|
||||
object JunctionsUnitTests {
|
||||
def apply(implicit p: Parameters): Seq[UnitTest] =
|
||||
Seq(
|
||||
Module(new MultiWidthFifoTest),
|
||||
Module(new AtosConverterTest),
|
||||
Module(new NastiMemoryDemuxTest),
|
||||
Module(new HastiTest))
|
||||
}
|
||||
365
src/main/scala/junctions/util.scala
Normal file
365
src/main/scala/junctions/util.scala
Normal file
@@ -0,0 +1,365 @@
|
||||
/// See LICENSE for license details.
|
||||
package junctions
|
||||
import Chisel._
|
||||
import cde.Parameters
|
||||
|
||||
class ParameterizedBundle(implicit p: Parameters) extends Bundle {
|
||||
override def cloneType = {
|
||||
try {
|
||||
this.getClass.getConstructors.head.newInstance(p).asInstanceOf[this.type]
|
||||
} catch {
|
||||
case e: java.lang.IllegalArgumentException =>
|
||||
throwException("Unable to use ParamaterizedBundle.cloneType on " +
|
||||
this.getClass + ", probably because " + this.getClass +
|
||||
"() takes more than one argument. Consider overriding " +
|
||||
"cloneType() on " + this.getClass, e)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
class HellaFlowQueue[T <: Data](val entries: Int)(data: => T) extends Module {
|
||||
val io = new QueueIO(data, entries)
|
||||
require(entries > 1)
|
||||
|
||||
val do_flow = Wire(Bool())
|
||||
val do_enq = io.enq.fire() && !do_flow
|
||||
val do_deq = io.deq.fire() && !do_flow
|
||||
|
||||
val maybe_full = Reg(init=Bool(false))
|
||||
val enq_ptr = Counter(do_enq, entries)._1
|
||||
val (deq_ptr, deq_done) = Counter(do_deq, entries)
|
||||
when (do_enq =/= do_deq) { maybe_full := do_enq }
|
||||
|
||||
val ptr_match = enq_ptr === deq_ptr
|
||||
val empty = ptr_match && !maybe_full
|
||||
val full = ptr_match && maybe_full
|
||||
val atLeastTwo = full || enq_ptr - deq_ptr >= UInt(2)
|
||||
do_flow := empty && io.deq.ready
|
||||
|
||||
val ram = SeqMem(entries, data)
|
||||
when (do_enq) { ram.write(enq_ptr, io.enq.bits) }
|
||||
|
||||
val ren = io.deq.ready && (atLeastTwo || !io.deq.valid && !empty)
|
||||
val raddr = Mux(io.deq.valid, Mux(deq_done, UInt(0), deq_ptr + UInt(1)), deq_ptr)
|
||||
val ram_out_valid = Reg(next = ren)
|
||||
|
||||
io.deq.valid := Mux(empty, io.enq.valid, ram_out_valid)
|
||||
io.enq.ready := !full
|
||||
io.deq.bits := Mux(empty, io.enq.bits, ram.read(raddr, ren))
|
||||
}
|
||||
|
||||
class HellaQueue[T <: Data](val entries: Int)(data: => T) extends Module {
|
||||
val io = new QueueIO(data, entries)
|
||||
|
||||
val fq = Module(new HellaFlowQueue(entries)(data))
|
||||
fq.io.enq <> io.enq
|
||||
io.deq <> Queue(fq.io.deq, 1, pipe = true)
|
||||
}
|
||||
|
||||
object HellaQueue {
|
||||
def apply[T <: Data](enq: DecoupledIO[T], entries: Int) = {
|
||||
val q = Module((new HellaQueue(entries)) { enq.bits })
|
||||
q.io.enq.valid := enq.valid // not using <> so that override is allowed
|
||||
q.io.enq.bits := enq.bits
|
||||
enq.ready := q.io.enq.ready
|
||||
q.io.deq
|
||||
}
|
||||
}
|
||||
|
||||
/** A generalized locking RR arbiter that addresses the limitations of the
|
||||
* version in the Chisel standard library */
|
||||
abstract class JunctionsAbstractLockingArbiter[T <: Data](typ: T, arbN: Int)
|
||||
extends Module {
|
||||
|
||||
val io = new Bundle {
|
||||
val in = Vec(arbN, Decoupled(typ.cloneType)).flip
|
||||
val out = Decoupled(typ.cloneType)
|
||||
}
|
||||
|
||||
def rotateLeft[T <: Data](norm: Vec[T], rot: UInt): Vec[T] = {
|
||||
val n = norm.size
|
||||
Vec.tabulate(n) { i =>
|
||||
Mux(rot < UInt(n - i), norm(UInt(i) + rot), norm(rot - UInt(n - i)))
|
||||
}
|
||||
}
|
||||
|
||||
val lockIdx = Reg(init = UInt(0, log2Up(arbN)))
|
||||
val locked = Reg(init = Bool(false))
|
||||
|
||||
val choice = PriorityMux(
|
||||
rotateLeft(Vec(io.in.map(_.valid)), lockIdx + UInt(1)),
|
||||
rotateLeft(Vec((0 until arbN).map(UInt(_))), lockIdx + UInt(1)))
|
||||
|
||||
val chosen = Mux(locked, lockIdx, choice)
|
||||
|
||||
for (i <- 0 until arbN) {
|
||||
io.in(i).ready := io.out.ready && chosen === UInt(i)
|
||||
}
|
||||
|
||||
io.out.valid := io.in(chosen).valid
|
||||
io.out.bits := io.in(chosen).bits
|
||||
}
|
||||
|
||||
/** This locking arbiter determines when it is safe to unlock
|
||||
* by peeking at the data */
|
||||
class JunctionsPeekingArbiter[T <: Data](
|
||||
typ: T, arbN: Int,
|
||||
canUnlock: T => Bool,
|
||||
needsLock: Option[T => Bool] = None)
|
||||
extends JunctionsAbstractLockingArbiter(typ, arbN) {
|
||||
|
||||
def realNeedsLock(data: T): Bool =
|
||||
needsLock.map(_(data)).getOrElse(Bool(true))
|
||||
|
||||
when (io.out.fire()) {
|
||||
when (!locked && realNeedsLock(io.out.bits)) {
|
||||
lockIdx := choice
|
||||
locked := Bool(true)
|
||||
}
|
||||
// the unlock statement takes precedent
|
||||
when (canUnlock(io.out.bits)) {
|
||||
locked := Bool(false)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** This arbiter determines when it is safe to unlock by counting transactions */
|
||||
class JunctionsCountingArbiter[T <: Data](
|
||||
typ: T, arbN: Int, count: Int,
|
||||
val needsLock: Option[T => Bool] = None)
|
||||
extends JunctionsAbstractLockingArbiter(typ, arbN) {
|
||||
|
||||
def realNeedsLock(data: T): Bool =
|
||||
needsLock.map(_(data)).getOrElse(Bool(true))
|
||||
|
||||
// if count is 1, you should use a non-locking arbiter
|
||||
require(count > 1, "CountingArbiter cannot have count <= 1")
|
||||
|
||||
val lock_ctr = Counter(count)
|
||||
|
||||
when (io.out.fire()) {
|
||||
when (!locked && realNeedsLock(io.out.bits)) {
|
||||
lockIdx := choice
|
||||
locked := Bool(true)
|
||||
lock_ctr.inc()
|
||||
}
|
||||
|
||||
when (locked) {
|
||||
when (lock_ctr.inc()) { locked := Bool(false) }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
class ReorderQueueWrite[T <: Data](dType: T, tagWidth: Int) extends Bundle {
|
||||
val data = dType.cloneType
|
||||
val tag = UInt(width = tagWidth)
|
||||
|
||||
override def cloneType =
|
||||
new ReorderQueueWrite(dType, tagWidth).asInstanceOf[this.type]
|
||||
}
|
||||
|
||||
class ReorderEnqueueIO[T <: Data](dType: T, tagWidth: Int)
|
||||
extends DecoupledIO(new ReorderQueueWrite(dType, tagWidth)) {
|
||||
|
||||
override def cloneType =
|
||||
new ReorderEnqueueIO(dType, tagWidth).asInstanceOf[this.type]
|
||||
}
|
||||
|
||||
class ReorderDequeueIO[T <: Data](dType: T, tagWidth: Int) extends Bundle {
|
||||
val valid = Bool(INPUT)
|
||||
val tag = UInt(INPUT, tagWidth)
|
||||
val data = dType.cloneType.asOutput
|
||||
val matches = Bool(OUTPUT)
|
||||
|
||||
override def cloneType =
|
||||
new ReorderDequeueIO(dType, tagWidth).asInstanceOf[this.type]
|
||||
}
|
||||
|
||||
class ReorderQueue[T <: Data](dType: T, tagWidth: Int, size: Option[Int] = None)
|
||||
extends Module {
|
||||
val io = new Bundle {
|
||||
val enq = new ReorderEnqueueIO(dType, tagWidth).flip
|
||||
val deq = new ReorderDequeueIO(dType, tagWidth)
|
||||
}
|
||||
|
||||
val tagSpaceSize = 1 << tagWidth
|
||||
val actualSize = size.getOrElse(tagSpaceSize)
|
||||
|
||||
if (tagSpaceSize > actualSize) {
|
||||
val roq_data = Reg(Vec(actualSize, dType))
|
||||
val roq_tags = Reg(Vec(actualSize, UInt(width = tagWidth)))
|
||||
val roq_free = Reg(init = Vec.fill(actualSize)(Bool(true)))
|
||||
|
||||
val roq_enq_addr = PriorityEncoder(roq_free)
|
||||
val roq_matches = roq_tags.zip(roq_free)
|
||||
.map { case (tag, free) => tag === io.deq.tag && !free }
|
||||
val roq_deq_onehot = PriorityEncoderOH(roq_matches)
|
||||
|
||||
io.enq.ready := roq_free.reduce(_ || _)
|
||||
io.deq.data := Mux1H(roq_deq_onehot, roq_data)
|
||||
io.deq.matches := roq_matches.reduce(_ || _)
|
||||
|
||||
when (io.enq.valid && io.enq.ready) {
|
||||
roq_data(roq_enq_addr) := io.enq.bits.data
|
||||
roq_tags(roq_enq_addr) := io.enq.bits.tag
|
||||
roq_free(roq_enq_addr) := Bool(false)
|
||||
}
|
||||
|
||||
when (io.deq.valid) {
|
||||
roq_free(OHToUInt(roq_deq_onehot)) := Bool(true)
|
||||
}
|
||||
|
||||
println(s"Warning - using a CAM for ReorderQueue, tagBits: ${tagWidth} size: ${actualSize}")
|
||||
} else {
|
||||
val roq_data = Mem(tagSpaceSize, dType)
|
||||
val roq_free = Reg(init = Vec.fill(tagSpaceSize)(Bool(true)))
|
||||
|
||||
io.enq.ready := roq_free(io.enq.bits.tag)
|
||||
io.deq.data := roq_data(io.deq.tag)
|
||||
io.deq.matches := !roq_free(io.deq.tag)
|
||||
|
||||
when (io.enq.valid && io.enq.ready) {
|
||||
roq_data(io.enq.bits.tag) := io.enq.bits.data
|
||||
roq_free(io.enq.bits.tag) := Bool(false)
|
||||
}
|
||||
|
||||
when (io.deq.valid) {
|
||||
roq_free(io.deq.tag) := Bool(true)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
object DecoupledHelper {
|
||||
def apply(rvs: Bool*) = new DecoupledHelper(rvs)
|
||||
}
|
||||
|
||||
class DecoupledHelper(val rvs: Seq[Bool]) {
|
||||
def fire(exclude: Bool, includes: Bool*) = {
|
||||
(rvs.filter(_ ne exclude) ++ includes).reduce(_ && _)
|
||||
}
|
||||
}
|
||||
|
||||
class MultiWidthFifo(inW: Int, outW: Int, n: Int) extends Module {
|
||||
val io = new Bundle {
|
||||
val in = Decoupled(Bits(width = inW)).flip
|
||||
val out = Decoupled(Bits(width = outW))
|
||||
val count = UInt(OUTPUT, log2Up(n + 1))
|
||||
}
|
||||
|
||||
if (inW == outW) {
|
||||
val q = Module(new Queue(Bits(width = inW), n))
|
||||
q.io.enq <> io.in
|
||||
io.out <> q.io.deq
|
||||
io.count := q.io.count
|
||||
} else if (inW > outW) {
|
||||
val nBeats = inW / outW
|
||||
|
||||
require(inW % outW == 0, s"MultiWidthFifo: in: $inW not divisible by out: $outW")
|
||||
require(n % nBeats == 0, s"Cannot store $n output words when output beats is $nBeats")
|
||||
|
||||
val wdata = Reg(Vec(n / nBeats, Bits(width = inW)))
|
||||
val rdata = Vec(wdata.flatMap { indat =>
|
||||
(0 until nBeats).map(i => indat(outW * (i + 1) - 1, outW * i)) })
|
||||
|
||||
val head = Reg(init = UInt(0, log2Up(n / nBeats)))
|
||||
val tail = Reg(init = UInt(0, log2Up(n)))
|
||||
val size = Reg(init = UInt(0, log2Up(n + 1)))
|
||||
|
||||
when (io.in.fire()) {
|
||||
wdata(head) := io.in.bits
|
||||
head := head + UInt(1)
|
||||
}
|
||||
|
||||
when (io.out.fire()) { tail := tail + UInt(1) }
|
||||
|
||||
size := MuxCase(size, Seq(
|
||||
(io.in.fire() && io.out.fire()) -> (size + UInt(nBeats - 1)),
|
||||
io.in.fire() -> (size + UInt(nBeats)),
|
||||
io.out.fire() -> (size - UInt(1))))
|
||||
|
||||
io.out.valid := size > UInt(0)
|
||||
io.out.bits := rdata(tail)
|
||||
io.in.ready := size < UInt(n)
|
||||
io.count := size
|
||||
} else {
|
||||
val nBeats = outW / inW
|
||||
|
||||
require(outW % inW == 0, s"MultiWidthFifo: out: $outW not divisible by in: $inW")
|
||||
|
||||
val wdata = Reg(Vec(n * nBeats, Bits(width = inW)))
|
||||
val rdata = Vec.tabulate(n) { i =>
|
||||
Cat(wdata.slice(i * nBeats, (i + 1) * nBeats).reverse)}
|
||||
|
||||
val head = Reg(init = UInt(0, log2Up(n * nBeats)))
|
||||
val tail = Reg(init = UInt(0, log2Up(n)))
|
||||
val size = Reg(init = UInt(0, log2Up(n * nBeats + 1)))
|
||||
|
||||
when (io.in.fire()) {
|
||||
wdata(head) := io.in.bits
|
||||
head := head + UInt(1)
|
||||
}
|
||||
|
||||
when (io.out.fire()) { tail := tail + UInt(1) }
|
||||
|
||||
size := MuxCase(size, Seq(
|
||||
(io.in.fire() && io.out.fire()) -> (size - UInt(nBeats - 1)),
|
||||
io.in.fire() -> (size + UInt(1)),
|
||||
io.out.fire() -> (size - UInt(nBeats))))
|
||||
|
||||
io.count := size >> UInt(log2Up(nBeats))
|
||||
io.out.valid := io.count > UInt(0)
|
||||
io.out.bits := rdata(tail)
|
||||
io.in.ready := size < UInt(n * nBeats)
|
||||
}
|
||||
}
|
||||
|
||||
// ============
|
||||
// Static timer
|
||||
// ============
|
||||
|
||||
// Timer with a statically-specified period.
|
||||
// Can take multiple inflight start-stop events with ID
|
||||
// Will continue to count down as long as at least one event is inflight
|
||||
|
||||
class Timer(initCount: Int, maxInflight: Int) extends Module {
|
||||
val io = new Bundle {
|
||||
val start = Valid(UInt(width = log2Up(maxInflight))).flip
|
||||
val stop = Valid(UInt(width = log2Up(maxInflight))).flip
|
||||
val timeout = Valid(UInt(width = log2Up(maxInflight)))
|
||||
}
|
||||
|
||||
val inflight = Reg(init = Vec.fill(maxInflight) { Bool(false) })
|
||||
val countdown = Reg(UInt(width = log2Up(initCount)))
|
||||
val active = inflight.reduce(_ || _)
|
||||
|
||||
when (active) {
|
||||
countdown := countdown - UInt(1)
|
||||
}
|
||||
|
||||
when (io.start.valid) {
|
||||
inflight(io.start.bits) := Bool(true)
|
||||
countdown := UInt(initCount - 1)
|
||||
}
|
||||
when (io.stop.valid) {
|
||||
inflight(io.stop.bits) := Bool(false)
|
||||
}
|
||||
|
||||
io.timeout.valid := countdown === UInt(0) && active
|
||||
io.timeout.bits := PriorityEncoder(inflight)
|
||||
|
||||
assert(!io.stop.valid || inflight(io.stop.bits),
|
||||
"Timer stop for transaction that's not inflight")
|
||||
}
|
||||
|
||||
object Timer {
|
||||
def apply(initCount: Int, start: Bool, stop: Bool): Bool = {
|
||||
val timer = Module(new Timer(initCount, 1))
|
||||
timer.io.start.valid := start
|
||||
timer.io.start.bits := UInt(0)
|
||||
timer.io.stop.valid := stop
|
||||
timer.io.stop.bits := UInt(0)
|
||||
timer.io.timeout.valid
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
113
src/main/scala/rocket/arbiter.scala
Normal file
113
src/main/scala/rocket/arbiter.scala
Normal file
@@ -0,0 +1,113 @@
|
||||
// See LICENSE for license details.
|
||||
|
||||
package rocket
|
||||
|
||||
import Chisel._
|
||||
import cde.{Parameters, Field}
|
||||
import junctions.{ParameterizedBundle, DecoupledHelper}
|
||||
|
||||
class HellaCacheArbiter(n: Int)(implicit p: Parameters) extends Module
|
||||
{
|
||||
val io = new Bundle {
|
||||
val requestor = Vec(n, new HellaCacheIO).flip
|
||||
val mem = new HellaCacheIO
|
||||
}
|
||||
|
||||
if (n == 1) {
|
||||
io.mem <> io.requestor.head
|
||||
} else {
|
||||
val s1_id = Reg(UInt())
|
||||
val s2_id = Reg(next=s1_id)
|
||||
|
||||
io.mem.invalidate_lr := io.requestor.map(_.invalidate_lr).reduce(_||_)
|
||||
io.mem.req.valid := io.requestor.map(_.req.valid).reduce(_||_)
|
||||
io.requestor(0).req.ready := io.mem.req.ready
|
||||
for (i <- 1 until n)
|
||||
io.requestor(i).req.ready := io.requestor(i-1).req.ready && !io.requestor(i-1).req.valid
|
||||
|
||||
for (i <- n-1 to 0 by -1) {
|
||||
val req = io.requestor(i).req
|
||||
def connect_s0() = {
|
||||
io.mem.req.bits.cmd := req.bits.cmd
|
||||
io.mem.req.bits.typ := req.bits.typ
|
||||
io.mem.req.bits.addr := req.bits.addr
|
||||
io.mem.req.bits.phys := req.bits.phys
|
||||
io.mem.req.bits.tag := Cat(req.bits.tag, UInt(i, log2Up(n)))
|
||||
s1_id := UInt(i)
|
||||
}
|
||||
def connect_s1() = {
|
||||
io.mem.s1_kill := io.requestor(i).s1_kill
|
||||
io.mem.s1_data := io.requestor(i).s1_data
|
||||
}
|
||||
|
||||
if (i == n-1) {
|
||||
connect_s0()
|
||||
connect_s1()
|
||||
} else {
|
||||
when (req.valid) { connect_s0() }
|
||||
when (s1_id === UInt(i)) { connect_s1() }
|
||||
}
|
||||
}
|
||||
|
||||
for (i <- 0 until n) {
|
||||
val resp = io.requestor(i).resp
|
||||
val tag_hit = io.mem.resp.bits.tag(log2Up(n)-1,0) === UInt(i)
|
||||
resp.valid := io.mem.resp.valid && tag_hit
|
||||
io.requestor(i).xcpt := io.mem.xcpt
|
||||
io.requestor(i).ordered := io.mem.ordered
|
||||
io.requestor(i).s2_nack := io.mem.s2_nack && s2_id === UInt(i)
|
||||
resp.bits := io.mem.resp.bits
|
||||
resp.bits.tag := io.mem.resp.bits.tag >> log2Up(n)
|
||||
|
||||
io.requestor(i).replay_next := io.mem.replay_next
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
class InOrderArbiter[T <: Data, U <: Data](reqTyp: T, respTyp: U, n: Int)
|
||||
(implicit p: Parameters) extends Module {
|
||||
val io = new Bundle {
|
||||
val in_req = Vec(n, Decoupled(reqTyp)).flip
|
||||
val in_resp = Vec(n, Decoupled(respTyp))
|
||||
val out_req = Decoupled(reqTyp)
|
||||
val out_resp = Decoupled(respTyp).flip
|
||||
}
|
||||
|
||||
if (n > 1) {
|
||||
val route_q = Module(new Queue(UInt(width = log2Up(n)), 2))
|
||||
val req_arb = Module(new RRArbiter(reqTyp, n))
|
||||
req_arb.io.in <> io.in_req
|
||||
|
||||
val req_helper = DecoupledHelper(
|
||||
req_arb.io.out.valid,
|
||||
route_q.io.enq.ready,
|
||||
io.out_req.ready)
|
||||
|
||||
io.out_req.bits := req_arb.io.out.bits
|
||||
io.out_req.valid := req_helper.fire(io.out_req.ready)
|
||||
|
||||
route_q.io.enq.bits := req_arb.io.chosen
|
||||
route_q.io.enq.valid := req_helper.fire(route_q.io.enq.ready)
|
||||
|
||||
req_arb.io.out.ready := req_helper.fire(req_arb.io.out.valid)
|
||||
|
||||
val resp_sel = route_q.io.deq.bits
|
||||
val resp_ready = io.in_resp(resp_sel).ready
|
||||
val resp_helper = DecoupledHelper(
|
||||
resp_ready,
|
||||
route_q.io.deq.valid,
|
||||
io.out_resp.valid)
|
||||
|
||||
val resp_valid = resp_helper.fire(resp_ready)
|
||||
for (i <- 0 until n) {
|
||||
io.in_resp(i).bits := io.out_resp.bits
|
||||
io.in_resp(i).valid := resp_valid && resp_sel === UInt(i)
|
||||
}
|
||||
|
||||
route_q.io.deq.ready := resp_helper.fire(route_q.io.deq.valid)
|
||||
io.out_resp.ready := resp_helper.fire(io.out_resp.valid)
|
||||
} else {
|
||||
io.out_req <> io.in_req.head
|
||||
io.in_resp.head <> io.out_resp
|
||||
}
|
||||
}
|
||||
82
src/main/scala/rocket/breakpoint.scala
Normal file
82
src/main/scala/rocket/breakpoint.scala
Normal file
@@ -0,0 +1,82 @@
|
||||
// See LICENSE for license details.
|
||||
|
||||
package rocket
|
||||
|
||||
import Chisel._
|
||||
import Util._
|
||||
import cde.Parameters
|
||||
|
||||
class TDRSelect(implicit p: Parameters) extends CoreBundle()(p) {
|
||||
val tdrmode = Bool()
|
||||
val reserved = UInt(width = xLen - 1 - log2Up(nTDR))
|
||||
val tdrindex = UInt(width = log2Up(nTDR))
|
||||
|
||||
def nTDR = p(NBreakpoints)
|
||||
}
|
||||
|
||||
class BPControl(implicit p: Parameters) extends CoreBundle()(p) {
|
||||
val tdrtype = UInt(width = 4)
|
||||
val bpamaskmax = UInt(width = 5)
|
||||
val reserved = UInt(width = xLen-28)
|
||||
val bpaction = UInt(width = 8)
|
||||
val bpmatch = UInt(width = 4)
|
||||
val m = Bool()
|
||||
val h = Bool()
|
||||
val s = Bool()
|
||||
val u = Bool()
|
||||
val r = Bool()
|
||||
val w = Bool()
|
||||
val x = Bool()
|
||||
|
||||
def tdrType = 1
|
||||
def bpaMaskMax = 4
|
||||
def enabled(mstatus: MStatus) = Cat(m, h, s, u)(mstatus.prv)
|
||||
}
|
||||
|
||||
class BP(implicit p: Parameters) extends CoreBundle()(p) {
|
||||
val control = new BPControl
|
||||
val address = UInt(width = vaddrBits)
|
||||
|
||||
def mask(dummy: Int = 0) = {
|
||||
var mask: UInt = control.bpmatch(1)
|
||||
for (i <- 1 until control.bpaMaskMax)
|
||||
mask = Cat(mask(i-1) && address(i-1), mask)
|
||||
mask
|
||||
}
|
||||
|
||||
def pow2AddressMatch(x: UInt) =
|
||||
(~x | mask()) === (~address | mask())
|
||||
}
|
||||
|
||||
class BreakpointUnit(implicit p: Parameters) extends CoreModule()(p) {
|
||||
val io = new Bundle {
|
||||
val status = new MStatus().asInput
|
||||
val bp = Vec(p(NBreakpoints), new BP).asInput
|
||||
val pc = UInt(INPUT, vaddrBits)
|
||||
val ea = UInt(INPUT, vaddrBits)
|
||||
val xcpt_if = Bool(OUTPUT)
|
||||
val xcpt_ld = Bool(OUTPUT)
|
||||
val xcpt_st = Bool(OUTPUT)
|
||||
}
|
||||
|
||||
io.xcpt_if := false
|
||||
io.xcpt_ld := false
|
||||
io.xcpt_st := false
|
||||
|
||||
for (bp <- io.bp) {
|
||||
when (bp.control.enabled(io.status)) {
|
||||
when (bp.pow2AddressMatch(io.pc) && bp.control.x) { io.xcpt_if := true }
|
||||
when (bp.pow2AddressMatch(io.ea) && bp.control.r) { io.xcpt_ld := true }
|
||||
when (bp.pow2AddressMatch(io.ea) && bp.control.w) { io.xcpt_st := true }
|
||||
}
|
||||
}
|
||||
|
||||
if (!io.bp.isEmpty) for ((bpl, bph) <- io.bp zip io.bp.tail) {
|
||||
def matches(x: UInt) = !(x < bpl.address) && x < bph.address
|
||||
when (bph.control.enabled(io.status) && bph.control.bpmatch === 1) {
|
||||
when (matches(io.pc) && bph.control.x) { io.xcpt_if := true }
|
||||
when (matches(io.ea) && bph.control.r) { io.xcpt_ld := true }
|
||||
when (matches(io.ea) && bph.control.w) { io.xcpt_st := true }
|
||||
}
|
||||
}
|
||||
}
|
||||
269
src/main/scala/rocket/btb.scala
Normal file
269
src/main/scala/rocket/btb.scala
Normal file
@@ -0,0 +1,269 @@
|
||||
// See LICENSE for license details.
|
||||
|
||||
package rocket
|
||||
|
||||
import Chisel._
|
||||
import junctions._
|
||||
import cde.{Parameters, Field}
|
||||
import Util._
|
||||
import uncore.util._
|
||||
|
||||
case object BtbKey extends Field[BtbParameters]
|
||||
|
||||
case class BtbParameters(
|
||||
nEntries: Int = 62,
|
||||
nRAS: Int = 2,
|
||||
updatesOutOfOrder: Boolean = false)
|
||||
|
||||
abstract trait HasBtbParameters extends HasCoreParameters {
|
||||
val matchBits = pgIdxBits
|
||||
val entries = p(BtbKey).nEntries
|
||||
val nRAS = p(BtbKey).nRAS
|
||||
val updatesOutOfOrder = p(BtbKey).updatesOutOfOrder
|
||||
val nPages = ((1 max(log2Up(entries)))+1)/2*2 // control logic assumes 2 divides pages
|
||||
val opaqueBits = log2Up(entries)
|
||||
val nBHT = 1 << log2Up(entries*2)
|
||||
}
|
||||
|
||||
abstract class BtbModule(implicit val p: Parameters) extends Module with HasBtbParameters
|
||||
abstract class BtbBundle(implicit val p: Parameters) extends ParameterizedBundle()(p)
|
||||
with HasBtbParameters
|
||||
|
||||
class RAS(nras: Int) {
|
||||
def push(addr: UInt): Unit = {
|
||||
when (count < nras) { count := count + 1 }
|
||||
val nextPos = Mux(Bool(isPow2(nras)) || pos < nras-1, pos+1, UInt(0))
|
||||
stack(nextPos) := addr
|
||||
pos := nextPos
|
||||
}
|
||||
def peek: UInt = stack(pos)
|
||||
def pop(): Unit = when (!isEmpty) {
|
||||
count := count - 1
|
||||
pos := Mux(Bool(isPow2(nras)) || pos > 0, pos-1, UInt(nras-1))
|
||||
}
|
||||
def clear(): Unit = count := UInt(0)
|
||||
def isEmpty: Bool = count === UInt(0)
|
||||
|
||||
private val count = Reg(UInt(width = log2Up(nras+1)))
|
||||
private val pos = Reg(UInt(width = log2Up(nras)))
|
||||
private val stack = Reg(Vec(nras, UInt()))
|
||||
}
|
||||
|
||||
class BHTResp(implicit p: Parameters) extends BtbBundle()(p) {
|
||||
val history = UInt(width = log2Up(nBHT).max(1))
|
||||
val value = UInt(width = 2)
|
||||
}
|
||||
|
||||
// BHT contains table of 2-bit counters and a global history register.
|
||||
// The BHT only predicts and updates when there is a BTB hit.
|
||||
// The global history:
|
||||
// - updated speculatively in fetch (if there's a BTB hit).
|
||||
// - on a mispredict, the history register is reset (again, only if BTB hit).
|
||||
// The counter table:
|
||||
// - each counter corresponds with the address of the fetch packet ("fetch pc").
|
||||
// - updated when a branch resolves (and BTB was a hit for that branch).
|
||||
// The updating branch must provide its "fetch pc".
|
||||
class BHT(nbht: Int)(implicit val p: Parameters) extends HasCoreParameters {
|
||||
val nbhtbits = log2Up(nbht)
|
||||
def get(addr: UInt, update: Bool): BHTResp = {
|
||||
val res = Wire(new BHTResp)
|
||||
val index = addr(nbhtbits+1, log2Up(coreInstBytes)) ^ history
|
||||
res.value := table(index)
|
||||
res.history := history
|
||||
val taken = res.value(0)
|
||||
when (update) { history := Cat(taken, history(nbhtbits-1,1)) }
|
||||
res
|
||||
}
|
||||
def update(addr: UInt, d: BHTResp, taken: Bool, mispredict: Bool): Unit = {
|
||||
val index = addr(nbhtbits+1, log2Up(coreInstBytes)) ^ d.history
|
||||
table(index) := Cat(taken, (d.value(1) & d.value(0)) | ((d.value(1) | d.value(0)) & taken))
|
||||
when (mispredict) { history := Cat(taken, d.history(nbhtbits-1,1)) }
|
||||
}
|
||||
|
||||
private val table = Mem(nbht, UInt(width = 2))
|
||||
val history = Reg(UInt(width = nbhtbits))
|
||||
}
|
||||
|
||||
// BTB update occurs during branch resolution (and only on a mispredict).
|
||||
// - "pc" is what future fetch PCs will tag match against.
|
||||
// - "br_pc" is the PC of the branch instruction.
|
||||
class BTBUpdate(implicit p: Parameters) extends BtbBundle()(p) {
|
||||
val prediction = Valid(new BTBResp)
|
||||
val pc = UInt(width = vaddrBits)
|
||||
val target = UInt(width = vaddrBits)
|
||||
val taken = Bool()
|
||||
val isValid = Bool()
|
||||
val isJump = Bool()
|
||||
val isReturn = Bool()
|
||||
val br_pc = UInt(width = vaddrBits)
|
||||
}
|
||||
|
||||
// BHT update occurs during branch resolution on all conditional branches.
|
||||
// - "pc" is what future fetch PCs will tag match against.
|
||||
class BHTUpdate(implicit p: Parameters) extends BtbBundle()(p) {
|
||||
val prediction = Valid(new BTBResp)
|
||||
val pc = UInt(width = vaddrBits)
|
||||
val taken = Bool()
|
||||
val mispredict = Bool()
|
||||
}
|
||||
|
||||
class RASUpdate(implicit p: Parameters) extends BtbBundle()(p) {
|
||||
val isCall = Bool()
|
||||
val isReturn = Bool()
|
||||
val returnAddr = UInt(width = vaddrBits)
|
||||
val prediction = Valid(new BTBResp)
|
||||
}
|
||||
|
||||
// - "bridx" is the low-order PC bits of the predicted branch (after
|
||||
// shifting off the lowest log(inst_bytes) bits off).
|
||||
// - "mask" provides a mask of valid instructions (instructions are
|
||||
// masked off by the predicted taken branch from the BTB).
|
||||
class BTBResp(implicit p: Parameters) extends BtbBundle()(p) {
|
||||
val taken = Bool()
|
||||
val mask = Bits(width = fetchWidth)
|
||||
val bridx = Bits(width = log2Up(fetchWidth))
|
||||
val target = UInt(width = vaddrBits)
|
||||
val entry = UInt(width = opaqueBits)
|
||||
val bht = new BHTResp
|
||||
}
|
||||
|
||||
class BTBReq(implicit p: Parameters) extends BtbBundle()(p) {
|
||||
val addr = UInt(width = vaddrBits)
|
||||
}
|
||||
|
||||
// fully-associative branch target buffer
|
||||
// Higher-performance processors may cause BTB updates to occur out-of-order,
|
||||
// which requires an extra CAM port for updates (to ensure no duplicates get
|
||||
// placed in BTB).
|
||||
class BTB(implicit p: Parameters) extends BtbModule {
|
||||
val io = new Bundle {
|
||||
val req = Valid(new BTBReq).flip
|
||||
val resp = Valid(new BTBResp)
|
||||
val btb_update = Valid(new BTBUpdate).flip
|
||||
val bht_update = Valid(new BHTUpdate).flip
|
||||
val ras_update = Valid(new RASUpdate).flip
|
||||
}
|
||||
|
||||
val idxs = Reg(Vec(entries, UInt(width=matchBits - log2Up(coreInstBytes))))
|
||||
val idxPages = Reg(Vec(entries, UInt(width=log2Up(nPages))))
|
||||
val tgts = Reg(Vec(entries, UInt(width=matchBits - log2Up(coreInstBytes))))
|
||||
val tgtPages = Reg(Vec(entries, UInt(width=log2Up(nPages))))
|
||||
val pages = Reg(Vec(nPages, UInt(width=vaddrBits - matchBits)))
|
||||
val pageValid = Reg(init = UInt(0, nPages))
|
||||
val idxPagesOH = idxPages.map(UIntToOH(_)(nPages-1,0))
|
||||
val tgtPagesOH = tgtPages.map(UIntToOH(_)(nPages-1,0))
|
||||
|
||||
val isValid = Reg(init = UInt(0, entries))
|
||||
val isReturn = Reg(UInt(width = entries))
|
||||
val isJump = Reg(UInt(width = entries))
|
||||
val brIdx = Reg(Vec(entries, UInt(width=log2Up(fetchWidth))))
|
||||
|
||||
private def page(addr: UInt) = addr >> matchBits
|
||||
private def pageMatch(addr: UInt) = {
|
||||
val p = page(addr)
|
||||
pageValid & pages.map(_ === p).asUInt
|
||||
}
|
||||
private def tagMatch(addr: UInt, pgMatch: UInt) = {
|
||||
val idxMatch = idxs.map(_ === addr(matchBits-1, log2Up(coreInstBytes))).asUInt
|
||||
val idxPageMatch = idxPagesOH.map(_ & pgMatch).map(_.orR).asUInt
|
||||
idxMatch & idxPageMatch & isValid
|
||||
}
|
||||
|
||||
val r_btb_update = Pipe(io.btb_update)
|
||||
val update_target = io.req.bits.addr
|
||||
|
||||
val pageHit = pageMatch(io.req.bits.addr)
|
||||
val hitsVec = tagMatch(io.req.bits.addr, pageHit)
|
||||
val hits = hitsVec.asUInt
|
||||
val updatePageHit = pageMatch(r_btb_update.bits.pc)
|
||||
|
||||
val updateHits = tagMatch(r_btb_update.bits.pc, updatePageHit)
|
||||
val updateHit = if (updatesOutOfOrder) updateHits.orR else r_btb_update.bits.prediction.valid
|
||||
val updateHitAddr = if (updatesOutOfOrder) OHToUInt(updateHits) else r_btb_update.bits.prediction.bits.entry
|
||||
val nextRepl = Counter(r_btb_update.valid && !updateHit, entries)._1
|
||||
|
||||
val useUpdatePageHit = updatePageHit.orR
|
||||
val usePageHit = pageHit.orR
|
||||
val doIdxPageRepl = !useUpdatePageHit
|
||||
val nextPageRepl = Reg(UInt(width = log2Ceil(nPages)))
|
||||
val idxPageRepl = Mux(usePageHit, Cat(pageHit(nPages-2,0), pageHit(nPages-1)), UIntToOH(nextPageRepl))
|
||||
val idxPageUpdateOH = Mux(useUpdatePageHit, updatePageHit, idxPageRepl)
|
||||
val idxPageUpdate = OHToUInt(idxPageUpdateOH)
|
||||
val idxPageReplEn = Mux(doIdxPageRepl, idxPageRepl, UInt(0))
|
||||
|
||||
val samePage = page(r_btb_update.bits.pc) === page(update_target)
|
||||
val doTgtPageRepl = !samePage && !usePageHit
|
||||
val tgtPageRepl = Mux(samePage, idxPageUpdateOH, Cat(idxPageUpdateOH(nPages-2,0), idxPageUpdateOH(nPages-1)))
|
||||
val tgtPageUpdate = OHToUInt(Mux(usePageHit, pageHit, tgtPageRepl))
|
||||
val tgtPageReplEn = Mux(doTgtPageRepl, tgtPageRepl, UInt(0))
|
||||
|
||||
when (r_btb_update.valid && (doIdxPageRepl || doTgtPageRepl)) {
|
||||
val both = doIdxPageRepl && doTgtPageRepl
|
||||
val next = nextPageRepl + Mux[UInt](both, 2, 1)
|
||||
nextPageRepl := Mux(next >= nPages, next(0), next)
|
||||
}
|
||||
|
||||
when (r_btb_update.valid) {
|
||||
val waddr = Mux(updateHit, updateHitAddr, nextRepl)
|
||||
val mask = UIntToOH(waddr)
|
||||
idxs(waddr) := r_btb_update.bits.pc(matchBits-1, log2Up(coreInstBytes))
|
||||
tgts(waddr) := update_target(matchBits-1, log2Up(coreInstBytes))
|
||||
idxPages(waddr) := idxPageUpdate
|
||||
tgtPages(waddr) := tgtPageUpdate
|
||||
isValid := Mux(r_btb_update.bits.isValid, isValid | mask, isValid & ~mask)
|
||||
isReturn := Mux(r_btb_update.bits.isReturn, isReturn | mask, isReturn & ~mask)
|
||||
isJump := Mux(r_btb_update.bits.isJump, isJump | mask, isJump & ~mask)
|
||||
if (fetchWidth > 1)
|
||||
brIdx(waddr) := r_btb_update.bits.br_pc >> log2Up(coreInstBytes)
|
||||
|
||||
require(nPages % 2 == 0)
|
||||
val idxWritesEven = !idxPageUpdate(0)
|
||||
|
||||
def writeBank(i: Int, mod: Int, en: UInt, data: UInt) =
|
||||
for (i <- i until nPages by mod)
|
||||
when (en(i)) { pages(i) := data }
|
||||
|
||||
writeBank(0, 2, Mux(idxWritesEven, idxPageReplEn, tgtPageReplEn),
|
||||
Mux(idxWritesEven, page(r_btb_update.bits.pc), page(update_target)))
|
||||
writeBank(1, 2, Mux(idxWritesEven, tgtPageReplEn, idxPageReplEn),
|
||||
Mux(idxWritesEven, page(update_target), page(r_btb_update.bits.pc)))
|
||||
pageValid := pageValid | tgtPageReplEn | idxPageReplEn
|
||||
}
|
||||
|
||||
io.resp.valid := hits.orR
|
||||
io.resp.bits.taken := true
|
||||
io.resp.bits.target := Cat(Mux1H(Mux1H(hitsVec, tgtPagesOH), pages), Mux1H(hitsVec, tgts) << log2Up(coreInstBytes))
|
||||
io.resp.bits.entry := OHToUInt(hits)
|
||||
io.resp.bits.bridx := (if (fetchWidth > 1) Mux1H(hitsVec, brIdx) else UInt(0))
|
||||
io.resp.bits.mask := Cat((UInt(1) << ~Mux(io.resp.bits.taken, ~io.resp.bits.bridx, UInt(0)))-1, UInt(1))
|
||||
|
||||
if (nBHT > 0) {
|
||||
val bht = new BHT(nBHT)
|
||||
val isBranch = !(hits & isJump).orR
|
||||
val res = bht.get(io.req.bits.addr, io.req.valid && io.resp.valid && isBranch)
|
||||
val update_btb_hit = io.bht_update.bits.prediction.valid
|
||||
when (io.bht_update.valid && update_btb_hit) {
|
||||
bht.update(io.bht_update.bits.pc, io.bht_update.bits.prediction.bits.bht, io.bht_update.bits.taken, io.bht_update.bits.mispredict)
|
||||
}
|
||||
when (!res.value(0) && isBranch) { io.resp.bits.taken := false }
|
||||
io.resp.bits.bht := res
|
||||
}
|
||||
|
||||
if (nRAS > 0) {
|
||||
val ras = new RAS(nRAS)
|
||||
val doPeek = (hits & isReturn).orR
|
||||
when (!ras.isEmpty && doPeek) {
|
||||
io.resp.bits.target := ras.peek
|
||||
}
|
||||
when (io.ras_update.valid) {
|
||||
when (io.ras_update.bits.isCall) {
|
||||
ras.push(io.ras_update.bits.returnAddr)
|
||||
when (doPeek) {
|
||||
io.resp.bits.target := io.ras_update.bits.returnAddr
|
||||
}
|
||||
}.elsewhen (io.ras_update.bits.isReturn && io.ras_update.bits.prediction.valid) {
|
||||
ras.pop()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
61
src/main/scala/rocket/consts.scala
Normal file
61
src/main/scala/rocket/consts.scala
Normal file
@@ -0,0 +1,61 @@
|
||||
// See LICENSE for license details.
|
||||
|
||||
package rocket
|
||||
package constants
|
||||
|
||||
import Chisel._
|
||||
import scala.math._
|
||||
|
||||
trait ScalarOpConstants {
|
||||
val MT_SZ = 3
|
||||
val MT_X = BitPat("b???")
|
||||
val MT_B = UInt("b000")
|
||||
val MT_H = UInt("b001")
|
||||
val MT_W = UInt("b010")
|
||||
val MT_D = UInt("b011")
|
||||
val MT_BU = UInt("b100")
|
||||
val MT_HU = UInt("b101")
|
||||
val MT_WU = UInt("b110")
|
||||
def mtSize(mt: UInt) = mt(MT_SZ-2, 0)
|
||||
def mtSigned(mt: UInt) = !mt(MT_SZ-1)
|
||||
|
||||
val SZ_BR = 3
|
||||
val BR_X = BitPat("b???")
|
||||
val BR_EQ = UInt(0, 3)
|
||||
val BR_NE = UInt(1, 3)
|
||||
val BR_J = UInt(2, 3)
|
||||
val BR_N = UInt(3, 3)
|
||||
val BR_LT = UInt(4, 3)
|
||||
val BR_GE = UInt(5, 3)
|
||||
val BR_LTU = UInt(6, 3)
|
||||
val BR_GEU = UInt(7, 3)
|
||||
|
||||
val A1_X = BitPat("b??")
|
||||
val A1_ZERO = UInt(0, 2)
|
||||
val A1_RS1 = UInt(1, 2)
|
||||
val A1_PC = UInt(2, 2)
|
||||
|
||||
val IMM_X = BitPat("b???")
|
||||
val IMM_S = UInt(0, 3)
|
||||
val IMM_SB = UInt(1, 3)
|
||||
val IMM_U = UInt(2, 3)
|
||||
val IMM_UJ = UInt(3, 3)
|
||||
val IMM_I = UInt(4, 3)
|
||||
val IMM_Z = UInt(5, 3)
|
||||
|
||||
val A2_X = BitPat("b??")
|
||||
val A2_ZERO = UInt(0, 2)
|
||||
val A2_SIZE = UInt(1, 2)
|
||||
val A2_RS2 = UInt(2, 2)
|
||||
val A2_IMM = UInt(3, 2)
|
||||
|
||||
val X = BitPat("b?")
|
||||
val N = BitPat("b0")
|
||||
val Y = BitPat("b1")
|
||||
|
||||
val SZ_DW = 1
|
||||
val DW_X = X
|
||||
val DW_32 = Bool(false)
|
||||
val DW_64 = Bool(true)
|
||||
val DW_XPR = DW_64
|
||||
}
|
||||
589
src/main/scala/rocket/csr.scala
Normal file
589
src/main/scala/rocket/csr.scala
Normal file
@@ -0,0 +1,589 @@
|
||||
// See LICENSE for license details.
|
||||
|
||||
package rocket
|
||||
|
||||
import Chisel._
|
||||
import Util._
|
||||
import Instructions._
|
||||
import cde.{Parameters, Field}
|
||||
import uncore.devices._
|
||||
import uncore.util._
|
||||
import junctions.AddrMap
|
||||
|
||||
class MStatus extends Bundle {
|
||||
val debug = Bool() // not truly part of mstatus, but convenient
|
||||
val prv = UInt(width = PRV.SZ) // not truly part of mstatus, but convenient
|
||||
val sd = Bool()
|
||||
val zero3 = UInt(width = 31)
|
||||
val sd_rv32 = Bool()
|
||||
val zero2 = UInt(width = 2)
|
||||
val vm = UInt(width = 5)
|
||||
val zero1 = UInt(width = 4)
|
||||
val mxr = Bool()
|
||||
val pum = Bool()
|
||||
val mprv = Bool()
|
||||
val xs = UInt(width = 2)
|
||||
val fs = UInt(width = 2)
|
||||
val mpp = UInt(width = 2)
|
||||
val hpp = UInt(width = 2)
|
||||
val spp = UInt(width = 1)
|
||||
val mpie = Bool()
|
||||
val hpie = Bool()
|
||||
val spie = Bool()
|
||||
val upie = Bool()
|
||||
val mie = Bool()
|
||||
val hie = Bool()
|
||||
val sie = Bool()
|
||||
val uie = Bool()
|
||||
}
|
||||
|
||||
class DCSR extends Bundle {
|
||||
val xdebugver = UInt(width = 2)
|
||||
val ndreset = Bool()
|
||||
val fullreset = Bool()
|
||||
val hwbpcount = UInt(width = 12)
|
||||
val ebreakm = Bool()
|
||||
val ebreakh = Bool()
|
||||
val ebreaks = Bool()
|
||||
val ebreaku = Bool()
|
||||
val zero2 = Bool()
|
||||
val stopcycle = Bool()
|
||||
val stoptime = Bool()
|
||||
val cause = UInt(width = 3)
|
||||
val debugint = Bool()
|
||||
val zero1 = Bool()
|
||||
val halt = Bool()
|
||||
val step = Bool()
|
||||
val prv = UInt(width = PRV.SZ)
|
||||
}
|
||||
|
||||
class MIP extends Bundle {
|
||||
val rocc = Bool()
|
||||
val meip = Bool()
|
||||
val heip = Bool()
|
||||
val seip = Bool()
|
||||
val ueip = Bool()
|
||||
val mtip = Bool()
|
||||
val htip = Bool()
|
||||
val stip = Bool()
|
||||
val utip = Bool()
|
||||
val msip = Bool()
|
||||
val hsip = Bool()
|
||||
val ssip = Bool()
|
||||
val usip = Bool()
|
||||
}
|
||||
|
||||
class PTBR(implicit p: Parameters) extends CoreBundle()(p) {
|
||||
require(maxPAddrBits - pgIdxBits + asIdBits <= xLen)
|
||||
val asid = UInt(width = asIdBits)
|
||||
val ppn = UInt(width = maxPAddrBits - pgIdxBits)
|
||||
}
|
||||
|
||||
object PRV
|
||||
{
|
||||
val SZ = 2
|
||||
val U = 0
|
||||
val S = 1
|
||||
val H = 2
|
||||
val M = 3
|
||||
}
|
||||
|
||||
object CSR
|
||||
{
|
||||
// commands
|
||||
val SZ = 3
|
||||
val X = BitPat.dontCare(SZ)
|
||||
val N = UInt(0,SZ)
|
||||
val W = UInt(1,SZ)
|
||||
val S = UInt(2,SZ)
|
||||
val C = UInt(3,SZ)
|
||||
val I = UInt(4,SZ)
|
||||
val R = UInt(5,SZ)
|
||||
|
||||
val ADDRSZ = 12
|
||||
}
|
||||
|
||||
class CSRFileIO(implicit p: Parameters) extends CoreBundle {
|
||||
val prci = new PRCITileIO().flip
|
||||
val rw = new Bundle {
|
||||
val addr = UInt(INPUT, CSR.ADDRSZ)
|
||||
val cmd = Bits(INPUT, CSR.SZ)
|
||||
val rdata = Bits(OUTPUT, xLen)
|
||||
val wdata = Bits(INPUT, xLen)
|
||||
}
|
||||
|
||||
val csr_stall = Bool(OUTPUT)
|
||||
val csr_xcpt = Bool(OUTPUT)
|
||||
val eret = Bool(OUTPUT)
|
||||
val singleStep = Bool(OUTPUT)
|
||||
|
||||
val status = new MStatus().asOutput
|
||||
val ptbr = new PTBR().asOutput
|
||||
val evec = UInt(OUTPUT, vaddrBitsExtended)
|
||||
val exception = Bool(INPUT)
|
||||
val retire = UInt(INPUT, log2Up(1+retireWidth))
|
||||
val custom_mrw_csrs = Vec(nCustomMrwCsrs, UInt(INPUT, xLen))
|
||||
val cause = UInt(INPUT, xLen)
|
||||
val pc = UInt(INPUT, vaddrBitsExtended)
|
||||
val badaddr = UInt(INPUT, vaddrBitsExtended)
|
||||
val fatc = Bool(OUTPUT)
|
||||
val time = UInt(OUTPUT, xLen)
|
||||
val fcsr_rm = Bits(OUTPUT, FPConstants.RM_SZ)
|
||||
val fcsr_flags = Valid(Bits(width = FPConstants.FLAGS_SZ)).flip
|
||||
val rocc = new RoCCInterface().flip
|
||||
val interrupt = Bool(OUTPUT)
|
||||
val interrupt_cause = UInt(OUTPUT, xLen)
|
||||
val bp = Vec(p(NBreakpoints), new BP).asOutput
|
||||
}
|
||||
|
||||
class CSRFile(implicit p: Parameters) extends CoreModule()(p)
|
||||
{
|
||||
val io = new CSRFileIO
|
||||
|
||||
val reset_mstatus = Wire(init=new MStatus().fromBits(0))
|
||||
reset_mstatus.mpp := PRV.M
|
||||
reset_mstatus.prv := PRV.M
|
||||
val reg_mstatus = Reg(init=reset_mstatus)
|
||||
|
||||
val new_prv = Wire(init = reg_mstatus.prv)
|
||||
reg_mstatus.prv := legalizePrivilege(new_prv)
|
||||
|
||||
val reset_dcsr = Wire(init=new DCSR().fromBits(0))
|
||||
reset_dcsr.xdebugver := 1
|
||||
reset_dcsr.prv := PRV.M
|
||||
val reg_dcsr = Reg(init=reset_dcsr)
|
||||
|
||||
val (supported_interrupts, delegable_interrupts) = {
|
||||
val sup = Wire(init=new MIP().fromBits(0))
|
||||
sup.ssip := Bool(p(UseVM))
|
||||
sup.msip := true
|
||||
sup.stip := Bool(p(UseVM))
|
||||
sup.mtip := true
|
||||
sup.meip := true
|
||||
sup.seip := Bool(p(UseVM))
|
||||
sup.rocc := usingRoCC
|
||||
|
||||
val del = Wire(init=sup)
|
||||
del.msip := false
|
||||
del.mtip := false
|
||||
del.meip := false
|
||||
|
||||
(sup.asUInt, del.asUInt)
|
||||
}
|
||||
val delegable_exceptions = UInt(Seq(
|
||||
Causes.misaligned_fetch,
|
||||
Causes.fault_fetch,
|
||||
Causes.breakpoint,
|
||||
Causes.fault_load,
|
||||
Causes.fault_store,
|
||||
Causes.user_ecall).map(1 << _).sum)
|
||||
|
||||
val exception = io.exception || io.csr_xcpt
|
||||
val reg_debug = Reg(init=Bool(false))
|
||||
val reg_dpc = Reg(UInt(width = vaddrBitsExtended))
|
||||
val reg_dscratch = Reg(UInt(width = xLen))
|
||||
|
||||
val reg_singleStepped = Reg(Bool())
|
||||
when (io.retire(0) || exception) { reg_singleStepped := true }
|
||||
when (!io.singleStep) { reg_singleStepped := false }
|
||||
assert(!io.singleStep || io.retire <= UInt(1))
|
||||
assert(!reg_singleStepped || io.retire === UInt(0))
|
||||
|
||||
val reg_tdrselect = Reg(new TDRSelect)
|
||||
val reg_bp = Reg(Vec(1 << log2Up(p(NBreakpoints)), new BP))
|
||||
|
||||
val reg_mie = Reg(init=UInt(0, xLen))
|
||||
val reg_mideleg = Reg(init=UInt(0, xLen))
|
||||
val reg_medeleg = Reg(init=UInt(0, xLen))
|
||||
val reg_mip = Reg(new MIP)
|
||||
val reg_mepc = Reg(UInt(width = vaddrBitsExtended))
|
||||
val reg_mcause = Reg(Bits(width = xLen))
|
||||
val reg_mbadaddr = Reg(UInt(width = vaddrBitsExtended))
|
||||
val reg_mscratch = Reg(Bits(width = xLen))
|
||||
val reg_mtvec = Reg(init=UInt(p(MtvecInit), paddrBits min xLen))
|
||||
|
||||
val reg_sepc = Reg(UInt(width = vaddrBitsExtended))
|
||||
val reg_scause = Reg(Bits(width = xLen))
|
||||
val reg_sbadaddr = Reg(UInt(width = vaddrBitsExtended))
|
||||
val reg_sscratch = Reg(Bits(width = xLen))
|
||||
val reg_stvec = Reg(UInt(width = vaddrBits))
|
||||
val reg_sptbr = Reg(new PTBR)
|
||||
val reg_wfi = Reg(init=Bool(false))
|
||||
|
||||
val reg_fflags = Reg(UInt(width = 5))
|
||||
val reg_frm = Reg(UInt(width = 3))
|
||||
|
||||
val reg_instret = WideCounter(64, io.retire)
|
||||
val reg_cycle: UInt = if (enableCommitLog) { reg_instret } else { WideCounter(64) }
|
||||
|
||||
val mip = Wire(init=reg_mip)
|
||||
mip.rocc := io.rocc.interrupt
|
||||
val read_mip = mip.asUInt & supported_interrupts
|
||||
|
||||
val pending_interrupts = read_mip & reg_mie
|
||||
val m_interrupts = Mux(!reg_debug && (reg_mstatus.prv < PRV.M || (reg_mstatus.prv === PRV.M && reg_mstatus.mie)), pending_interrupts & ~reg_mideleg, UInt(0))
|
||||
val s_interrupts = Mux(!reg_debug && (reg_mstatus.prv < PRV.S || (reg_mstatus.prv === PRV.S && reg_mstatus.sie)), pending_interrupts & reg_mideleg, UInt(0))
|
||||
val all_interrupts = m_interrupts | s_interrupts
|
||||
val interruptMSB = BigInt(1) << (xLen-1)
|
||||
val interruptCause = interruptMSB + PriorityEncoder(all_interrupts)
|
||||
io.interrupt := all_interrupts.orR && !io.singleStep || reg_singleStepped
|
||||
io.interrupt_cause := interruptCause
|
||||
io.bp := reg_bp take p(NBreakpoints)
|
||||
|
||||
val debugIntCause = reg_mip.getWidth
|
||||
// debug interrupts are only masked by being in debug mode
|
||||
when (Bool(usingDebug) && reg_dcsr.debugint && !reg_debug) {
|
||||
io.interrupt := true
|
||||
io.interrupt_cause := interruptMSB + debugIntCause
|
||||
}
|
||||
|
||||
val system_insn = io.rw.cmd === CSR.I
|
||||
val cpu_ren = io.rw.cmd =/= CSR.N && !system_insn
|
||||
|
||||
val isa_string = "IM" +
|
||||
(if (usingVM) "S" else "") +
|
||||
(if (usingUser) "U" else "") +
|
||||
(if (usingAtomics) "A" else "") +
|
||||
(if (usingFPU) "FD" else "") +
|
||||
(if (usingRoCC) "X" else "")
|
||||
val isa = (BigInt(log2Ceil(xLen) - 4) << (xLen-2)) |
|
||||
isa_string.map(x => 1 << (x - 'A')).reduce(_|_)
|
||||
val read_mstatus = io.status.asUInt()(xLen-1,0)
|
||||
|
||||
val read_mapping = collection.mutable.LinkedHashMap[Int,Bits](
|
||||
CSRs.tdrselect -> reg_tdrselect.asUInt,
|
||||
CSRs.tdrdata1 -> reg_bp(reg_tdrselect.tdrindex).control.asUInt,
|
||||
CSRs.tdrdata2 -> reg_bp(reg_tdrselect.tdrindex).address,
|
||||
CSRs.mimpid -> UInt(0),
|
||||
CSRs.marchid -> UInt(0),
|
||||
CSRs.mvendorid -> UInt(0),
|
||||
CSRs.mcycle -> reg_cycle,
|
||||
CSRs.minstret -> reg_instret,
|
||||
CSRs.mucounteren -> UInt(0),
|
||||
CSRs.mutime_delta -> UInt(0),
|
||||
CSRs.mucycle_delta -> UInt(0),
|
||||
CSRs.muinstret_delta -> UInt(0),
|
||||
CSRs.misa -> UInt(isa),
|
||||
CSRs.mstatus -> read_mstatus,
|
||||
CSRs.mtvec -> reg_mtvec,
|
||||
CSRs.mip -> read_mip,
|
||||
CSRs.mie -> reg_mie,
|
||||
CSRs.mideleg -> reg_mideleg,
|
||||
CSRs.medeleg -> reg_medeleg,
|
||||
CSRs.mscratch -> reg_mscratch,
|
||||
CSRs.mepc -> reg_mepc.sextTo(xLen),
|
||||
CSRs.mbadaddr -> reg_mbadaddr.sextTo(xLen),
|
||||
CSRs.mcause -> reg_mcause,
|
||||
CSRs.mhartid -> io.prci.id)
|
||||
|
||||
if (usingDebug) {
|
||||
read_mapping += CSRs.dcsr -> reg_dcsr.asUInt
|
||||
read_mapping += CSRs.dpc -> reg_dpc.asUInt
|
||||
read_mapping += CSRs.dscratch -> reg_dscratch.asUInt
|
||||
}
|
||||
|
||||
if (usingFPU) {
|
||||
read_mapping += CSRs.fflags -> reg_fflags
|
||||
read_mapping += CSRs.frm -> reg_frm
|
||||
read_mapping += CSRs.fcsr -> Cat(reg_frm, reg_fflags)
|
||||
}
|
||||
|
||||
if (usingVM) {
|
||||
val read_sie = reg_mie & reg_mideleg
|
||||
val read_sip = read_mip & reg_mideleg
|
||||
val read_sstatus = Wire(init=io.status)
|
||||
read_sstatus.vm := 0
|
||||
read_sstatus.mprv := 0
|
||||
read_sstatus.mpp := 0
|
||||
read_sstatus.hpp := 0
|
||||
read_sstatus.mpie := 0
|
||||
read_sstatus.hpie := 0
|
||||
read_sstatus.mie := 0
|
||||
read_sstatus.hie := 0
|
||||
|
||||
read_mapping += CSRs.sstatus -> (read_sstatus.asUInt())(xLen-1,0)
|
||||
read_mapping += CSRs.sip -> read_sip.asUInt
|
||||
read_mapping += CSRs.sie -> read_sie.asUInt
|
||||
read_mapping += CSRs.sscratch -> reg_sscratch
|
||||
read_mapping += CSRs.scause -> reg_scause
|
||||
read_mapping += CSRs.sbadaddr -> reg_sbadaddr.sextTo(xLen)
|
||||
read_mapping += CSRs.sptbr -> reg_sptbr.asUInt
|
||||
read_mapping += CSRs.sepc -> reg_sepc.sextTo(xLen)
|
||||
read_mapping += CSRs.stvec -> reg_stvec.sextTo(xLen)
|
||||
read_mapping += CSRs.mscounteren -> UInt(0)
|
||||
read_mapping += CSRs.mstime_delta -> UInt(0)
|
||||
read_mapping += CSRs.mscycle_delta -> UInt(0)
|
||||
read_mapping += CSRs.msinstret_delta -> UInt(0)
|
||||
}
|
||||
|
||||
if (xLen == 32) {
|
||||
read_mapping += CSRs.mcycleh -> (reg_cycle >> 32)
|
||||
read_mapping += CSRs.minstreth -> (reg_instret >> 32)
|
||||
read_mapping += CSRs.mutime_deltah -> UInt(0)
|
||||
read_mapping += CSRs.mucycle_deltah -> UInt(0)
|
||||
read_mapping += CSRs.muinstret_deltah -> UInt(0)
|
||||
if (usingVM) {
|
||||
read_mapping += CSRs.mstime_deltah -> UInt(0)
|
||||
read_mapping += CSRs.mscycle_deltah -> UInt(0)
|
||||
read_mapping += CSRs.msinstret_deltah -> UInt(0)
|
||||
}
|
||||
}
|
||||
|
||||
for (i <- 0 until nCustomMrwCsrs) {
|
||||
val addr = 0xff0 + i
|
||||
require(addr < (1 << CSR.ADDRSZ))
|
||||
require(!read_mapping.contains(addr), "custom MRW CSR address " + i + " is already in use")
|
||||
read_mapping += addr -> io.custom_mrw_csrs(i)
|
||||
}
|
||||
|
||||
val decoded_addr = read_mapping map { case (k, v) => k -> (io.rw.addr === k) }
|
||||
|
||||
val addr_valid = decoded_addr.values.reduce(_||_)
|
||||
val fp_csr =
|
||||
if (usingFPU) decoded_addr(CSRs.fflags) || decoded_addr(CSRs.frm) || decoded_addr(CSRs.fcsr)
|
||||
else Bool(false)
|
||||
val csr_debug = Bool(usingDebug) && io.rw.addr(5)
|
||||
val csr_addr_priv = Cat(io.rw.addr(6,5).andR, io.rw.addr(9,8))
|
||||
val priv_sufficient = Cat(reg_debug, reg_mstatus.prv) >= csr_addr_priv
|
||||
val read_only = io.rw.addr(11,10).andR
|
||||
val cpu_wen = cpu_ren && io.rw.cmd =/= CSR.R && priv_sufficient
|
||||
val wen = cpu_wen && !read_only
|
||||
|
||||
val wdata = (Mux(io.rw.cmd.isOneOf(CSR.S, CSR.C), io.rw.rdata, UInt(0)) |
|
||||
Mux(io.rw.cmd =/= CSR.C, io.rw.wdata, UInt(0))) &
|
||||
~Mux(io.rw.cmd === CSR.C, io.rw.wdata, UInt(0))
|
||||
|
||||
val do_system_insn = priv_sufficient && system_insn
|
||||
val opcode = UInt(1) << io.rw.addr(2,0)
|
||||
val insn_call = do_system_insn && opcode(0)
|
||||
val insn_break = do_system_insn && opcode(1)
|
||||
val insn_ret = do_system_insn && opcode(2)
|
||||
val insn_sfence_vm = do_system_insn && opcode(4)
|
||||
val insn_wfi = do_system_insn && opcode(5)
|
||||
|
||||
io.csr_xcpt := (cpu_wen && read_only) ||
|
||||
(cpu_ren && (!priv_sufficient || !addr_valid || fp_csr && !io.status.fs.orR)) ||
|
||||
(system_insn && !priv_sufficient) ||
|
||||
insn_call || insn_break
|
||||
|
||||
when (insn_wfi) { reg_wfi := true }
|
||||
when (pending_interrupts.orR) { reg_wfi := false }
|
||||
|
||||
val cause =
|
||||
Mux(!io.csr_xcpt, io.cause,
|
||||
Mux(insn_call, reg_mstatus.prv + Causes.user_ecall,
|
||||
Mux[UInt](insn_break, Causes.breakpoint, Causes.illegal_instruction)))
|
||||
val cause_lsbs = cause(log2Up(xLen)-1,0)
|
||||
val causeIsDebugInt = cause(xLen-1) && cause_lsbs === debugIntCause
|
||||
val causeIsDebugBreak = cause === Causes.breakpoint && Cat(reg_dcsr.ebreakm, reg_dcsr.ebreakh, reg_dcsr.ebreaks, reg_dcsr.ebreaku)(reg_mstatus.prv)
|
||||
val trapToDebug = Bool(usingDebug) && (reg_singleStepped || causeIsDebugInt || causeIsDebugBreak || reg_debug)
|
||||
val delegate = Bool(p(UseVM)) && reg_mstatus.prv < PRV.M && Mux(cause(xLen-1), reg_mideleg(cause_lsbs), reg_medeleg(cause_lsbs))
|
||||
val debugTVec = Mux(reg_debug, UInt(0x808), UInt(0x800))
|
||||
val tvec = Mux(trapToDebug, debugTVec, Mux(delegate, reg_stvec.sextTo(vaddrBitsExtended), reg_mtvec))
|
||||
val epc = Mux(csr_debug, reg_dpc, Mux(Bool(p(UseVM)) && !csr_addr_priv(1), reg_sepc, reg_mepc))
|
||||
io.fatc := insn_sfence_vm
|
||||
io.evec := Mux(exception, tvec, epc)
|
||||
io.ptbr := reg_sptbr
|
||||
io.eret := insn_ret
|
||||
io.singleStep := reg_dcsr.step && !reg_debug
|
||||
io.status := reg_mstatus
|
||||
io.status.sd := io.status.fs.andR || io.status.xs.andR
|
||||
io.status.debug := reg_debug
|
||||
if (xLen == 32)
|
||||
io.status.sd_rv32 := io.status.sd
|
||||
|
||||
when (exception) {
|
||||
val epc = ~(~io.pc | (coreInstBytes-1))
|
||||
val pie = read_mstatus(reg_mstatus.prv)
|
||||
|
||||
val write_badaddr = cause isOneOf (Causes.breakpoint,
|
||||
Causes.misaligned_load, Causes.misaligned_store, Causes.misaligned_fetch,
|
||||
Causes.fault_load, Causes.fault_store, Causes.fault_fetch)
|
||||
|
||||
when (trapToDebug) {
|
||||
reg_debug := true
|
||||
reg_dpc := epc
|
||||
reg_dcsr.cause := Mux(reg_singleStepped, UInt(4), Mux(causeIsDebugInt, UInt(3), UInt(1)))
|
||||
reg_dcsr.prv := trimPrivilege(reg_mstatus.prv)
|
||||
}.elsewhen (delegate) {
|
||||
reg_sepc := epc
|
||||
reg_scause := cause
|
||||
when (write_badaddr) { reg_sbadaddr := io.badaddr }
|
||||
reg_mstatus.spie := pie
|
||||
reg_mstatus.spp := reg_mstatus.prv
|
||||
reg_mstatus.sie := false
|
||||
new_prv := PRV.S
|
||||
}.otherwise {
|
||||
reg_mepc := epc
|
||||
reg_mcause := cause
|
||||
when (write_badaddr) { reg_mbadaddr := io.badaddr }
|
||||
reg_mstatus.mpie := pie
|
||||
reg_mstatus.mpp := trimPrivilege(reg_mstatus.prv)
|
||||
reg_mstatus.mie := false
|
||||
new_prv := PRV.M
|
||||
}
|
||||
}
|
||||
|
||||
when (insn_ret) {
|
||||
when (Bool(p(UseVM)) && !csr_addr_priv(1)) {
|
||||
when (reg_mstatus.spp.toBool) { reg_mstatus.sie := reg_mstatus.spie }
|
||||
reg_mstatus.spie := false
|
||||
reg_mstatus.spp := PRV.U
|
||||
new_prv := reg_mstatus.spp
|
||||
}.elsewhen (csr_debug) {
|
||||
new_prv := reg_dcsr.prv
|
||||
reg_debug := false
|
||||
}.otherwise {
|
||||
when (reg_mstatus.mpp(1)) { reg_mstatus.mie := reg_mstatus.mpie }
|
||||
.elsewhen (Bool(usingVM) && reg_mstatus.mpp(0)) { reg_mstatus.sie := reg_mstatus.mpie }
|
||||
reg_mstatus.mpie := false
|
||||
reg_mstatus.mpp := legalizePrivilege(PRV.U)
|
||||
new_prv := reg_mstatus.mpp
|
||||
}
|
||||
}
|
||||
|
||||
assert(PopCount(insn_ret :: io.exception :: io.csr_xcpt :: Nil) <= 1, "these conditions must be mutually exclusive")
|
||||
|
||||
io.time := reg_cycle
|
||||
io.csr_stall := reg_wfi
|
||||
|
||||
io.rw.rdata := Mux1H(for ((k, v) <- read_mapping) yield decoded_addr(k) -> v)
|
||||
|
||||
io.fcsr_rm := reg_frm
|
||||
when (io.fcsr_flags.valid) {
|
||||
reg_fflags := reg_fflags | io.fcsr_flags.bits
|
||||
}
|
||||
|
||||
when (wen) {
|
||||
when (decoded_addr(CSRs.mstatus)) {
|
||||
val new_mstatus = new MStatus().fromBits(wdata)
|
||||
reg_mstatus.mie := new_mstatus.mie
|
||||
reg_mstatus.mpie := new_mstatus.mpie
|
||||
|
||||
if (usingUser) {
|
||||
reg_mstatus.mprv := new_mstatus.mprv
|
||||
reg_mstatus.mpp := trimPrivilege(new_mstatus.mpp)
|
||||
if (usingVM) {
|
||||
reg_mstatus.mxr := new_mstatus.mxr
|
||||
reg_mstatus.pum := new_mstatus.pum
|
||||
reg_mstatus.spp := new_mstatus.spp
|
||||
reg_mstatus.spie := new_mstatus.spie
|
||||
reg_mstatus.sie := new_mstatus.sie
|
||||
}
|
||||
}
|
||||
|
||||
if (usingVM) {
|
||||
require(if (xLen == 32) pgLevels == 2 else pgLevels > 2 && pgLevels < 6)
|
||||
val vm_on = 6 + pgLevels // TODO Sv48 support should imply Sv39 support
|
||||
when (new_mstatus.vm === 0) { reg_mstatus.vm := 0 }
|
||||
when (new_mstatus.vm === vm_on) { reg_mstatus.vm := vm_on }
|
||||
}
|
||||
if (usingVM || usingFPU) reg_mstatus.fs := Fill(2, new_mstatus.fs.orR)
|
||||
if (usingRoCC) reg_mstatus.xs := Fill(2, new_mstatus.xs.orR)
|
||||
}
|
||||
when (decoded_addr(CSRs.mip)) {
|
||||
val new_mip = new MIP().fromBits(wdata)
|
||||
if (usingVM) {
|
||||
reg_mip.ssip := new_mip.ssip
|
||||
reg_mip.stip := new_mip.stip
|
||||
}
|
||||
}
|
||||
when (decoded_addr(CSRs.mie)) { reg_mie := wdata & supported_interrupts }
|
||||
when (decoded_addr(CSRs.mepc)) { reg_mepc := ~(~wdata | (coreInstBytes-1)) }
|
||||
when (decoded_addr(CSRs.mscratch)) { reg_mscratch := wdata }
|
||||
if (p(MtvecWritable))
|
||||
when (decoded_addr(CSRs.mtvec)) { reg_mtvec := wdata >> 2 << 2 }
|
||||
when (decoded_addr(CSRs.mcause)) { reg_mcause := wdata & UInt((BigInt(1) << (xLen-1)) + 31) /* only implement 5 LSBs and MSB */ }
|
||||
when (decoded_addr(CSRs.mbadaddr)) { reg_mbadaddr := wdata(vaddrBitsExtended-1,0) }
|
||||
if (usingFPU) {
|
||||
when (decoded_addr(CSRs.fflags)) { reg_fflags := wdata }
|
||||
when (decoded_addr(CSRs.frm)) { reg_frm := wdata }
|
||||
when (decoded_addr(CSRs.fcsr)) { reg_fflags := wdata; reg_frm := wdata >> reg_fflags.getWidth }
|
||||
}
|
||||
if (usingDebug) {
|
||||
when (decoded_addr(CSRs.dcsr)) {
|
||||
val new_dcsr = new DCSR().fromBits(wdata)
|
||||
reg_dcsr.halt := new_dcsr.halt
|
||||
reg_dcsr.step := new_dcsr.step
|
||||
reg_dcsr.ebreakm := new_dcsr.ebreakm
|
||||
if (usingVM) reg_dcsr.ebreaks := new_dcsr.ebreaks
|
||||
if (usingUser) reg_dcsr.ebreaku := new_dcsr.ebreaku
|
||||
if (usingUser) reg_dcsr.prv := trimPrivilege(new_dcsr.prv)
|
||||
}
|
||||
when (decoded_addr(CSRs.dpc)) { reg_dpc := ~(~wdata | (coreInstBytes-1)) }
|
||||
when (decoded_addr(CSRs.dscratch)) { reg_dscratch := wdata }
|
||||
}
|
||||
if (usingVM) {
|
||||
when (decoded_addr(CSRs.sstatus)) {
|
||||
val new_sstatus = new MStatus().fromBits(wdata)
|
||||
reg_mstatus.sie := new_sstatus.sie
|
||||
reg_mstatus.spie := new_sstatus.spie
|
||||
reg_mstatus.spp := new_sstatus.spp
|
||||
reg_mstatus.pum := new_sstatus.pum
|
||||
reg_mstatus.fs := Fill(2, new_sstatus.fs.orR) // even without an FPU
|
||||
if (usingRoCC) reg_mstatus.xs := Fill(2, new_sstatus.xs.orR)
|
||||
}
|
||||
when (decoded_addr(CSRs.sip)) {
|
||||
val new_sip = new MIP().fromBits(wdata)
|
||||
reg_mip.ssip := new_sip.ssip
|
||||
}
|
||||
when (decoded_addr(CSRs.sie)) { reg_mie := (reg_mie & ~reg_mideleg) | (wdata & reg_mideleg) }
|
||||
when (decoded_addr(CSRs.sscratch)) { reg_sscratch := wdata }
|
||||
when (decoded_addr(CSRs.sptbr)) { reg_sptbr.ppn := wdata(ppnBits-1,0) }
|
||||
when (decoded_addr(CSRs.sepc)) { reg_sepc := ~(~wdata | (coreInstBytes-1)) }
|
||||
when (decoded_addr(CSRs.stvec)) { reg_stvec := wdata >> 2 << 2 }
|
||||
when (decoded_addr(CSRs.scause)) { reg_scause := wdata & UInt((BigInt(1) << (xLen-1)) + 31) /* only implement 5 LSBs and MSB */ }
|
||||
when (decoded_addr(CSRs.sbadaddr)) { reg_sbadaddr := wdata(vaddrBitsExtended-1,0) }
|
||||
when (decoded_addr(CSRs.mideleg)) { reg_mideleg := wdata & delegable_interrupts }
|
||||
when (decoded_addr(CSRs.medeleg)) { reg_medeleg := wdata & delegable_exceptions }
|
||||
}
|
||||
if (p(NBreakpoints) > 0) {
|
||||
val newTDR = new TDRSelect().fromBits(wdata)
|
||||
when (decoded_addr(CSRs.tdrselect)) { reg_tdrselect.tdrindex := newTDR.tdrindex }
|
||||
|
||||
when (reg_tdrselect.tdrmode || reg_debug) {
|
||||
when (decoded_addr(CSRs.tdrdata1)) {
|
||||
val newBPC = new BPControl().fromBits(wdata)
|
||||
reg_bp(reg_tdrselect.tdrindex).control := newBPC
|
||||
reg_bp(reg_tdrselect.tdrindex).control.bpmatch := newBPC.bpmatch & 2 /* exact/NAPOT only */
|
||||
}
|
||||
when (decoded_addr(CSRs.tdrdata2)) { reg_bp(reg_tdrselect.tdrindex).address := wdata }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
reg_mip := io.prci.interrupts
|
||||
reg_dcsr.debugint := io.prci.interrupts.debug
|
||||
reg_dcsr.hwbpcount := UInt(p(NBreakpoints))
|
||||
|
||||
reg_sptbr.asid := 0
|
||||
reg_tdrselect.reserved := 0
|
||||
reg_tdrselect.tdrmode := true // TODO support D-mode breakpoint theft
|
||||
if (reg_bp.isEmpty) reg_tdrselect.tdrindex := 0
|
||||
for (bpc <- reg_bp map {_.control}) {
|
||||
bpc.tdrtype := bpc.tdrType
|
||||
bpc.bpamaskmax := bpc.bpaMaskMax
|
||||
bpc.reserved := 0
|
||||
bpc.bpaction := 0
|
||||
bpc.h := false
|
||||
if (!usingVM) bpc.s := false
|
||||
if (!usingUser) bpc.u := false
|
||||
if (!usingVM && !usingUser) bpc.m := true
|
||||
when (reset) {
|
||||
bpc.r := false
|
||||
bpc.w := false
|
||||
bpc.x := false
|
||||
}
|
||||
}
|
||||
for (bp <- reg_bp drop p(NBreakpoints))
|
||||
bp := new BP().fromBits(0)
|
||||
|
||||
def legalizePrivilege(priv: UInt): UInt =
|
||||
if (usingVM) Mux(priv === PRV.H, PRV.U, priv)
|
||||
else if (usingUser) Fill(2, priv(0))
|
||||
else PRV.M
|
||||
|
||||
def trimPrivilege(priv: UInt): UInt =
|
||||
if (usingVM) priv
|
||||
else legalizePrivilege(priv)
|
||||
}
|
||||
444
src/main/scala/rocket/dcache.scala
Normal file
444
src/main/scala/rocket/dcache.scala
Normal file
@@ -0,0 +1,444 @@
|
||||
// See LICENSE for license details.
|
||||
|
||||
package rocket
|
||||
|
||||
import Chisel._
|
||||
import junctions._
|
||||
import uncore.tilelink._
|
||||
import uncore.agents._
|
||||
import uncore.coherence._
|
||||
import uncore.util._
|
||||
import uncore.constants._
|
||||
import cde.{Parameters, Field}
|
||||
import Util._
|
||||
|
||||
class DCacheDataReq(implicit p: Parameters) extends L1HellaCacheBundle()(p) {
|
||||
val addr = Bits(width = untagBits)
|
||||
val write = Bool()
|
||||
val wdata = Bits(width = rowBits)
|
||||
val wmask = Bits(width = rowBytes)
|
||||
val way_en = Bits(width = nWays)
|
||||
}
|
||||
|
||||
class DCacheDataArray(implicit p: Parameters) extends L1HellaCacheModule()(p) {
|
||||
val io = new Bundle {
|
||||
val req = Valid(new DCacheDataReq).flip
|
||||
val resp = Vec(nWays, Bits(OUTPUT, rowBits))
|
||||
}
|
||||
|
||||
val addr = io.req.bits.addr >> rowOffBits
|
||||
for (w <- 0 until nWays) {
|
||||
val array = SeqMem(nSets*refillCycles, Vec(rowBytes, Bits(width=8)))
|
||||
val valid = io.req.valid && (Bool(nWays == 1) || io.req.bits.way_en(w))
|
||||
when (valid && io.req.bits.write) {
|
||||
val data = Vec.tabulate(rowBytes)(i => io.req.bits.wdata(8*(i+1)-1, 8*i))
|
||||
array.write(addr, data, io.req.bits.wmask.toBools)
|
||||
}
|
||||
io.resp(w) := array.read(addr, valid && !io.req.bits.write).asUInt
|
||||
}
|
||||
}
|
||||
|
||||
class DCache(implicit p: Parameters) extends L1HellaCacheModule()(p) {
|
||||
val io = new Bundle {
|
||||
val cpu = (new HellaCacheIO).flip
|
||||
val ptw = new TLBPTWIO()
|
||||
val mem = new ClientTileLinkIO
|
||||
}
|
||||
|
||||
val fq = Module(new FinishQueue(1))
|
||||
|
||||
require(rowBits == encRowBits) // no ECC
|
||||
require(refillCyclesPerBeat == 1)
|
||||
require(rowBits >= coreDataBits)
|
||||
|
||||
// tags
|
||||
val replacer = p(Replacer)()
|
||||
def onReset = L1Metadata(UInt(0), ClientMetadata.onReset)
|
||||
val meta = Module(new MetadataArray(onReset _))
|
||||
val metaReadArb = Module(new Arbiter(new MetaReadReq, 3))
|
||||
val metaWriteArb = Module(new Arbiter(new L1MetaWriteReq, 3))
|
||||
meta.io.read <> metaReadArb.io.out
|
||||
meta.io.write <> metaWriteArb.io.out
|
||||
|
||||
// data
|
||||
val data = Module(new DCacheDataArray)
|
||||
val dataArb = Module(new Arbiter(new DCacheDataReq, 4))
|
||||
data.io.req <> dataArb.io.out
|
||||
dataArb.io.out.ready := true
|
||||
|
||||
val s1_valid = Reg(next=io.cpu.req.fire(), init=Bool(false))
|
||||
val s1_probe = Reg(next=io.mem.probe.fire(), init=Bool(false))
|
||||
val probe_bits = RegEnable(io.mem.probe.bits, io.mem.probe.fire())
|
||||
val s1_nack = Wire(init=Bool(false))
|
||||
val s1_valid_masked = s1_valid && !io.cpu.s1_kill && !io.cpu.xcpt.asUInt.orR
|
||||
val s1_valid_not_nacked = s1_valid_masked && !s1_nack
|
||||
val s1_req = Reg(io.cpu.req.bits)
|
||||
when (metaReadArb.io.out.valid) {
|
||||
s1_req := io.cpu.req.bits
|
||||
s1_req.addr := Cat(io.cpu.req.bits.addr >> untagBits, metaReadArb.io.out.bits.idx, io.cpu.req.bits.addr(blockOffBits-1,0))
|
||||
}
|
||||
val s1_read = isRead(s1_req.cmd)
|
||||
val s1_write = isWrite(s1_req.cmd)
|
||||
val s1_readwrite = s1_read || s1_write
|
||||
val s1_flush_valid = Reg(Bool())
|
||||
|
||||
val s_ready :: s_grant_wait :: s_voluntary_writeback :: s_probe_rep_dirty :: s_probe_rep_clean :: s_probe_rep_miss :: s_voluntary_write_meta :: s_probe_write_meta :: Nil = Enum(UInt(), 8)
|
||||
val grant_wait = Reg(init=Bool(false))
|
||||
val release_ack_wait = Reg(init=Bool(false))
|
||||
val release_state = Reg(init=s_ready)
|
||||
val pstore1_valid = Wire(Bool())
|
||||
val pstore2_valid = Reg(Bool())
|
||||
val inWriteback = release_state.isOneOf(s_voluntary_writeback, s_probe_rep_dirty)
|
||||
val releaseWay = Wire(UInt())
|
||||
io.cpu.req.ready := (release_state === s_ready) && !grant_wait && !s1_nack
|
||||
|
||||
// hit initiation path
|
||||
dataArb.io.in(3).valid := io.cpu.req.valid && isRead(io.cpu.req.bits.cmd)
|
||||
dataArb.io.in(3).bits.write := false
|
||||
dataArb.io.in(3).bits.addr := io.cpu.req.bits.addr
|
||||
dataArb.io.in(3).bits.way_en := ~UInt(0, nWays)
|
||||
when (!dataArb.io.in(3).ready && isRead(io.cpu.req.bits.cmd)) { io.cpu.req.ready := false }
|
||||
metaReadArb.io.in(2).valid := io.cpu.req.valid
|
||||
metaReadArb.io.in(2).bits.idx := io.cpu.req.bits.addr(idxMSB, idxLSB)
|
||||
metaReadArb.io.in(2).bits.way_en := ~UInt(0, nWays)
|
||||
when (!metaReadArb.io.in(2).ready) { io.cpu.req.ready := false }
|
||||
|
||||
// address translation
|
||||
val tlb = Module(new TLB)
|
||||
io.ptw <> tlb.io.ptw
|
||||
tlb.io.req.valid := s1_valid_masked && s1_readwrite
|
||||
tlb.io.req.bits.passthrough := s1_req.phys
|
||||
tlb.io.req.bits.vpn := s1_req.addr >> pgIdxBits
|
||||
tlb.io.req.bits.instruction := false
|
||||
tlb.io.req.bits.store := s1_write
|
||||
when (!tlb.io.req.ready && !io.cpu.req.bits.phys) { io.cpu.req.ready := false }
|
||||
when (s1_valid && s1_readwrite && tlb.io.resp.miss) { s1_nack := true }
|
||||
|
||||
val s1_paddr = Cat(tlb.io.resp.ppn, s1_req.addr(pgIdxBits-1,0))
|
||||
val s1_tag = Mux(s1_probe, probe_bits.addr_block >> idxBits, s1_paddr(paddrBits-1, untagBits))
|
||||
val s1_hit_way = meta.io.resp.map(r => r.coh.isValid() && r.tag === s1_tag).asUInt
|
||||
val s1_hit_state = ClientMetadata.onReset.fromBits(
|
||||
meta.io.resp.map(r => Mux(r.tag === s1_tag, r.coh.asUInt, UInt(0)))
|
||||
.reduce (_|_))
|
||||
val s1_data_way = Mux(inWriteback, releaseWay, s1_hit_way)
|
||||
val s1_data = Mux1H(s1_data_way, data.io.resp) // retime into s2 if critical
|
||||
val s1_victim_way = Wire(init = replacer.way)
|
||||
|
||||
val s2_valid = Reg(next=s1_valid_masked, init=Bool(false))
|
||||
val s2_probe = Reg(next=s1_probe, init=Bool(false))
|
||||
val releaseInFlight = s1_probe || s2_probe || release_state =/= s_ready
|
||||
val s2_valid_masked = s2_valid && Reg(next = !s1_nack)
|
||||
val s2_req = Reg(io.cpu.req.bits)
|
||||
val s2_uncached = Reg(Bool())
|
||||
when (s1_valid_not_nacked || s1_flush_valid) {
|
||||
s2_req := s1_req
|
||||
s2_req.addr := s1_paddr
|
||||
s2_uncached := !tlb.io.resp.cacheable
|
||||
}
|
||||
val s2_read = isRead(s2_req.cmd)
|
||||
val s2_write = isWrite(s2_req.cmd)
|
||||
val s2_readwrite = s2_read || s2_write
|
||||
val s2_flush_valid = RegNext(s1_flush_valid)
|
||||
val s2_data = RegEnable(s1_data, s1_valid || inWriteback)
|
||||
val s2_probe_way = RegEnable(s1_hit_way, s1_probe)
|
||||
val s2_probe_state = RegEnable(s1_hit_state, s1_probe)
|
||||
val s2_hit_way = RegEnable(s1_hit_way, s1_valid_not_nacked)
|
||||
val s2_hit_state = RegEnable(s1_hit_state, s1_valid_not_nacked)
|
||||
val s2_hit = s2_hit_state.isHit(s2_req.cmd)
|
||||
val s2_valid_hit = s2_valid_masked && s2_readwrite && s2_hit
|
||||
val s2_valid_miss = s2_valid_masked && s2_readwrite && !s2_hit && !(pstore1_valid || pstore2_valid) && !release_ack_wait
|
||||
val s2_valid_cached_miss = s2_valid_miss && !s2_uncached
|
||||
val s2_victimize = s2_valid_cached_miss || s2_flush_valid
|
||||
val s2_valid_uncached = s2_valid_miss && s2_uncached
|
||||
val s2_victim_way = Mux(s2_hit_state.isValid() && !s2_flush_valid, s2_hit_way, UIntToOH(RegEnable(s1_victim_way, s1_valid_not_nacked || s1_flush_valid)))
|
||||
val s2_victim_tag = RegEnable(meta.io.resp(s1_victim_way).tag, s1_valid_not_nacked || s1_flush_valid)
|
||||
val s2_victim_state = Mux(s2_hit_state.isValid() && !s2_flush_valid, s2_hit_state, RegEnable(meta.io.resp(s1_victim_way).coh, s1_valid_not_nacked || s1_flush_valid))
|
||||
val s2_victim_valid = s2_victim_state.isValid()
|
||||
val s2_victim_dirty = s2_victim_state.requiresVoluntaryWriteback()
|
||||
val s2_new_hit_state = s2_hit_state.onHit(s2_req.cmd)
|
||||
val s2_update_meta = s2_hit_state =/= s2_new_hit_state
|
||||
io.cpu.s2_nack := s2_valid && !s2_valid_hit && !(s2_valid_uncached && io.mem.acquire.ready)
|
||||
when (s2_valid && (!s2_valid_hit || s2_update_meta)) { s1_nack := true }
|
||||
|
||||
// exceptions
|
||||
val misaligned = new StoreGen(s1_req.typ, s1_req.addr, UInt(0), wordBytes).misaligned
|
||||
io.cpu.xcpt.ma.ld := s1_read && misaligned
|
||||
io.cpu.xcpt.ma.st := s1_write && misaligned
|
||||
io.cpu.xcpt.pf.ld := s1_read && tlb.io.resp.xcpt_ld
|
||||
io.cpu.xcpt.pf.st := s1_write && tlb.io.resp.xcpt_st
|
||||
|
||||
// load reservations
|
||||
val s2_lr = Bool(usingAtomics) && s2_req.cmd === M_XLR
|
||||
val s2_sc = Bool(usingAtomics) && s2_req.cmd === M_XSC
|
||||
val lrscCount = Reg(init=UInt(0))
|
||||
val lrscValid = lrscCount > 0
|
||||
val lrscAddr = Reg(UInt())
|
||||
val s2_sc_fail = s2_sc && !(lrscValid && lrscAddr === (s2_req.addr >> blockOffBits))
|
||||
when (s2_valid_hit && s2_lr) {
|
||||
lrscCount := lrscCycles - 1
|
||||
lrscAddr := s2_req.addr >> blockOffBits
|
||||
}
|
||||
when (lrscValid) { lrscCount := lrscCount - 1 }
|
||||
when ((s2_valid_hit && s2_sc) || io.cpu.invalidate_lr) { lrscCount := 0 }
|
||||
|
||||
// pending store buffer
|
||||
val pstore1_cmd = RegEnable(s1_req.cmd, s1_valid_not_nacked && s1_write)
|
||||
val pstore1_typ = RegEnable(s1_req.typ, s1_valid_not_nacked && s1_write)
|
||||
val pstore1_addr = RegEnable(s1_paddr, s1_valid_not_nacked && s1_write)
|
||||
val pstore1_data = RegEnable(io.cpu.s1_data, s1_valid_not_nacked && s1_write)
|
||||
val pstore1_way = RegEnable(s1_hit_way, s1_valid_not_nacked && s1_write)
|
||||
val pstore1_storegen = new StoreGen(pstore1_typ, pstore1_addr, pstore1_data, wordBytes)
|
||||
val pstore1_storegen_data = Wire(init = pstore1_storegen.data)
|
||||
val pstore1_amo = Bool(usingAtomics) && isRead(pstore1_cmd)
|
||||
val pstore_drain_structural = pstore1_valid && pstore2_valid && ((s1_valid && s1_write) || pstore1_amo)
|
||||
val pstore_drain_opportunistic = !(io.cpu.req.valid && isRead(io.cpu.req.bits.cmd))
|
||||
val pstore_drain_on_miss = releaseInFlight || io.cpu.s2_nack
|
||||
val pstore_drain =
|
||||
Bool(usingAtomics) && pstore_drain_structural ||
|
||||
(((pstore1_valid && !pstore1_amo) || pstore2_valid) && (pstore_drain_opportunistic || pstore_drain_on_miss))
|
||||
pstore1_valid := {
|
||||
val s2_store_valid = s2_valid_hit && s2_write && !s2_sc_fail
|
||||
val pstore1_held = Reg(Bool())
|
||||
assert(!s2_store_valid || !pstore1_held)
|
||||
pstore1_held := (s2_store_valid || pstore1_held) && pstore2_valid && !pstore_drain
|
||||
s2_store_valid || pstore1_held
|
||||
}
|
||||
val advance_pstore1 = pstore1_valid && (pstore2_valid === pstore_drain)
|
||||
pstore2_valid := pstore2_valid && !pstore_drain || advance_pstore1
|
||||
val pstore2_addr = RegEnable(pstore1_addr, advance_pstore1)
|
||||
val pstore2_way = RegEnable(pstore1_way, advance_pstore1)
|
||||
val pstore2_storegen_data = RegEnable(pstore1_storegen_data, advance_pstore1)
|
||||
val pstore2_storegen_mask = RegEnable(pstore1_storegen.mask, advance_pstore1)
|
||||
dataArb.io.in(0).valid := pstore_drain
|
||||
dataArb.io.in(0).bits.write := true
|
||||
dataArb.io.in(0).bits.addr := Mux(pstore2_valid, pstore2_addr, pstore1_addr)
|
||||
dataArb.io.in(0).bits.way_en := Mux(pstore2_valid, pstore2_way, pstore1_way)
|
||||
dataArb.io.in(0).bits.wdata := Fill(rowWords, Mux(pstore2_valid, pstore2_storegen_data, pstore1_storegen_data))
|
||||
val pstore_mask_shift = Mux(pstore2_valid, pstore2_addr, pstore1_addr).extract(rowOffBits-1,offsetlsb) << wordOffBits
|
||||
dataArb.io.in(0).bits.wmask := Mux(pstore2_valid, pstore2_storegen_mask, pstore1_storegen.mask) << pstore_mask_shift
|
||||
|
||||
// store->load RAW hazard detection
|
||||
val s1_idx = s1_req.addr(idxMSB, wordOffBits)
|
||||
val s1_raw_hazard = s1_read &&
|
||||
((pstore1_valid && pstore1_addr(idxMSB, wordOffBits) === s1_idx) ||
|
||||
(pstore2_valid && pstore2_addr(idxMSB, wordOffBits) === s1_idx))
|
||||
when (s1_valid && s1_raw_hazard) { s1_nack := true }
|
||||
|
||||
metaWriteArb.io.in(0).valid := (s2_valid_hit && s2_update_meta) || (s2_victimize && !s2_victim_dirty)
|
||||
metaWriteArb.io.in(0).bits.way_en := s2_victim_way
|
||||
metaWriteArb.io.in(0).bits.idx := s2_req.addr(idxMSB, idxLSB)
|
||||
metaWriteArb.io.in(0).bits.data.coh := Mux(s2_hit, s2_new_hit_state, ClientMetadata.onReset)
|
||||
metaWriteArb.io.in(0).bits.data.tag := s2_req.addr(paddrBits-1, untagBits)
|
||||
|
||||
// acquire
|
||||
val cachedGetMessage = s2_hit_state.makeAcquire(
|
||||
client_xact_id = UInt(0),
|
||||
addr_block = s2_req.addr(paddrBits-1, blockOffBits),
|
||||
op_code = s2_req.cmd)
|
||||
val uncachedGetMessage = Get(
|
||||
client_xact_id = UInt(0),
|
||||
addr_block = s2_req.addr(paddrBits-1, blockOffBits),
|
||||
addr_beat = s2_req.addr(blockOffBits-1, beatOffBits),
|
||||
addr_byte = s2_req.addr(beatOffBits-1, 0),
|
||||
operand_size = s2_req.typ,
|
||||
alloc = Bool(false))
|
||||
val uncachedPutOffset = s2_req.addr.extract(beatOffBits-1, wordOffBits)
|
||||
val uncachedPutMessage = Put(
|
||||
client_xact_id = UInt(0),
|
||||
addr_block = s2_req.addr(paddrBits-1, blockOffBits),
|
||||
addr_beat = s2_req.addr(blockOffBits-1, beatOffBits),
|
||||
data = Fill(beatWords, pstore1_storegen.data),
|
||||
wmask = Some(pstore1_storegen.mask << (uncachedPutOffset << wordOffBits)),
|
||||
alloc = Bool(false))
|
||||
val uncachedPutAtomicMessage = PutAtomic(
|
||||
client_xact_id = UInt(0),
|
||||
addr_block = s2_req.addr(paddrBits-1, blockOffBits),
|
||||
addr_beat = s2_req.addr(blockOffBits-1, beatOffBits),
|
||||
addr_byte = s2_req.addr(beatOffBits-1, 0),
|
||||
atomic_opcode = s2_req.cmd,
|
||||
operand_size = s2_req.typ,
|
||||
data = Fill(beatWords, pstore1_storegen.data))
|
||||
io.mem.acquire.valid := ((s2_valid_cached_miss && !s2_victim_dirty) || s2_valid_uncached) && fq.io.enq.ready
|
||||
io.mem.acquire.bits := cachedGetMessage
|
||||
when (s2_uncached) {
|
||||
assert(!s2_valid_masked || !s2_hit_state.isValid(), "cache hit on uncached access")
|
||||
io.mem.acquire.bits := uncachedGetMessage
|
||||
when (s2_write) {
|
||||
io.mem.acquire.bits := uncachedPutMessage
|
||||
when (pstore1_amo) {
|
||||
io.mem.acquire.bits := uncachedPutAtomicMessage
|
||||
}
|
||||
}
|
||||
}
|
||||
when (io.mem.acquire.fire()) { grant_wait := true }
|
||||
|
||||
// grant
|
||||
val grantIsRefill = io.mem.grant.bits.hasMultibeatData()
|
||||
val grantIsVoluntary = io.mem.grant.bits.isVoluntary()
|
||||
val grantIsUncached = !grantIsRefill && !grantIsVoluntary
|
||||
when (io.mem.grant.valid) {
|
||||
assert(grant_wait || grantIsVoluntary && release_ack_wait, "unexpected grant")
|
||||
when (grantIsUncached) { s2_data := io.mem.grant.bits.data }
|
||||
when (grantIsVoluntary) { release_ack_wait := false }
|
||||
}
|
||||
val (refillCount, refillDone) = Counter(io.mem.grant.fire() && grantIsRefill, refillCycles)
|
||||
val grantDone = refillDone || grantIsUncached
|
||||
when (io.mem.grant.fire() && grantDone) { grant_wait := false }
|
||||
|
||||
// data refill
|
||||
dataArb.io.in(1).valid := grantIsRefill && io.mem.grant.valid
|
||||
io.mem.grant.ready := true
|
||||
assert(dataArb.io.in(1).ready || !dataArb.io.in(1).valid)
|
||||
dataArb.io.in(1).bits.write := true
|
||||
dataArb.io.in(1).bits.addr := Cat(s2_req.addr(paddrBits-1, blockOffBits), io.mem.grant.bits.addr_beat) << beatOffBits
|
||||
dataArb.io.in(1).bits.way_en := s2_victim_way
|
||||
dataArb.io.in(1).bits.wdata := io.mem.grant.bits.data
|
||||
dataArb.io.in(1).bits.wmask := ~UInt(0, rowBytes)
|
||||
// tag updates on refill
|
||||
metaWriteArb.io.in(1).valid := refillDone
|
||||
assert(!metaWriteArb.io.in(1).valid || metaWriteArb.io.in(1).ready)
|
||||
metaWriteArb.io.in(1).bits.way_en := s2_victim_way
|
||||
metaWriteArb.io.in(1).bits.idx := s2_req.addr(idxMSB, idxLSB)
|
||||
metaWriteArb.io.in(1).bits.data.coh := s2_hit_state.onGrant(io.mem.grant.bits, s2_req.cmd)
|
||||
metaWriteArb.io.in(1).bits.data.tag := s2_req.addr(paddrBits-1, untagBits)
|
||||
|
||||
// finish
|
||||
fq.io.enq.valid := io.mem.grant.fire() && io.mem.grant.bits.requiresAck() && (!grantIsRefill || refillDone)
|
||||
fq.io.enq.bits := io.mem.grant.bits.makeFinish()
|
||||
io.mem.finish <> fq.io.deq
|
||||
when (fq.io.enq.valid) { assert(fq.io.enq.ready) }
|
||||
when (refillDone) { replacer.miss }
|
||||
|
||||
// probe
|
||||
val block_probe = releaseInFlight || lrscValid || (s2_valid_hit && s2_lr)
|
||||
metaReadArb.io.in(1).valid := io.mem.probe.valid && !block_probe
|
||||
io.mem.probe.ready := metaReadArb.io.in(1).ready && !block_probe && !s1_valid && (!s2_valid || s2_valid_hit)
|
||||
metaReadArb.io.in(1).bits.idx := io.mem.probe.bits.addr_block
|
||||
metaReadArb.io.in(1).bits.way_en := ~UInt(0, nWays)
|
||||
|
||||
// release
|
||||
val (writebackCount, writebackDone) = Counter(io.mem.release.fire() && inWriteback, refillCycles)
|
||||
val releaseDone = writebackDone || (io.mem.release.fire() && !inWriteback)
|
||||
val releaseRejected = io.mem.release.valid && !io.mem.release.ready
|
||||
val s1_release_data_valid = Reg(next = dataArb.io.in(2).fire())
|
||||
val s2_release_data_valid = Reg(next = s1_release_data_valid && !releaseRejected)
|
||||
val releaseDataBeat = Cat(UInt(0), writebackCount) + Mux(releaseRejected, UInt(0), s1_release_data_valid + Cat(UInt(0), s2_release_data_valid))
|
||||
io.mem.release.valid := s2_release_data_valid
|
||||
io.mem.release.bits := ClientMetadata.onReset.makeRelease(probe_bits)
|
||||
val voluntaryReleaseMessage = s2_victim_state.makeVoluntaryWriteback(UInt(0), UInt(0))
|
||||
val voluntaryNewCoh = s2_victim_state.onCacheControl(M_FLUSH)
|
||||
val probeResponseMessage = s2_probe_state.makeRelease(probe_bits)
|
||||
val probeNewCoh = s2_probe_state.onProbe(probe_bits)
|
||||
val newCoh = Wire(init = probeNewCoh)
|
||||
releaseWay := s2_probe_way
|
||||
when (s2_victimize && s2_victim_dirty) {
|
||||
assert(!s2_hit_state.isValid())
|
||||
release_state := s_voluntary_writeback
|
||||
probe_bits.addr_block := Cat(s2_victim_tag, s2_req.addr(idxMSB, idxLSB))
|
||||
}
|
||||
when (s2_probe) {
|
||||
when (s2_probe_state.requiresVoluntaryWriteback()) { release_state := s_probe_rep_dirty }
|
||||
.elsewhen (s2_probe_state.isValid()) { release_state := s_probe_rep_clean }
|
||||
.otherwise {
|
||||
io.mem.release.valid := true
|
||||
release_state := s_probe_rep_miss
|
||||
}
|
||||
}
|
||||
when (releaseDone) { release_state := s_ready }
|
||||
when (release_state.isOneOf(s_probe_rep_miss, s_probe_rep_clean)) {
|
||||
io.mem.release.valid := true
|
||||
}
|
||||
when (release_state.isOneOf(s_probe_rep_clean, s_probe_rep_dirty)) {
|
||||
io.mem.release.bits := probeResponseMessage
|
||||
when (releaseDone) { release_state := s_probe_write_meta }
|
||||
}
|
||||
when (release_state.isOneOf(s_voluntary_writeback, s_voluntary_write_meta)) {
|
||||
io.mem.release.bits := voluntaryReleaseMessage
|
||||
newCoh := voluntaryNewCoh
|
||||
releaseWay := s2_victim_way
|
||||
when (releaseDone) {
|
||||
release_state := s_voluntary_write_meta
|
||||
release_ack_wait := true
|
||||
}
|
||||
}
|
||||
when (s2_probe && !io.mem.release.fire()) { s1_nack := true }
|
||||
io.mem.release.bits.addr_block := probe_bits.addr_block
|
||||
io.mem.release.bits.addr_beat := writebackCount
|
||||
io.mem.release.bits.data := s2_data
|
||||
|
||||
dataArb.io.in(2).valid := inWriteback && releaseDataBeat < refillCycles
|
||||
dataArb.io.in(2).bits.write := false
|
||||
dataArb.io.in(2).bits.addr := Cat(io.mem.release.bits.addr_block, releaseDataBeat(log2Up(refillCycles)-1,0)) << rowOffBits
|
||||
dataArb.io.in(2).bits.way_en := ~UInt(0, nWays)
|
||||
|
||||
metaWriteArb.io.in(2).valid := release_state.isOneOf(s_voluntary_write_meta, s_probe_write_meta)
|
||||
metaWriteArb.io.in(2).bits.way_en := releaseWay
|
||||
metaWriteArb.io.in(2).bits.idx := io.mem.release.bits.full_addr()(idxMSB, idxLSB)
|
||||
metaWriteArb.io.in(2).bits.data.coh := newCoh
|
||||
metaWriteArb.io.in(2).bits.data.tag := io.mem.release.bits.full_addr()(paddrBits-1, untagBits)
|
||||
when (metaWriteArb.io.in(2).fire()) { release_state := s_ready }
|
||||
|
||||
// cached response
|
||||
io.cpu.resp.valid := s2_valid_hit
|
||||
io.cpu.resp.bits := s2_req
|
||||
io.cpu.resp.bits.has_data := s2_read
|
||||
io.cpu.resp.bits.replay := false
|
||||
io.cpu.ordered := !(s1_valid || s2_valid || grant_wait)
|
||||
|
||||
// uncached response
|
||||
io.cpu.replay_next := io.mem.grant.valid && grantIsUncached
|
||||
val doUncachedResp = Reg(next = io.cpu.replay_next)
|
||||
when (doUncachedResp) {
|
||||
assert(!s2_valid_hit)
|
||||
io.cpu.resp.valid := true
|
||||
io.cpu.resp.bits.replay := true
|
||||
}
|
||||
|
||||
// load data subword mux/sign extension
|
||||
val s2_word_idx = s2_req.addr.extract(log2Up(rowBits/8)-1, log2Up(wordBytes))
|
||||
val s2_data_word = s2_data >> Cat(s2_word_idx, UInt(0, log2Up(coreDataBits)))
|
||||
val loadgen = new LoadGen(s2_req.typ, mtSigned(s2_req.typ), s2_req.addr, s2_data_word, s2_sc, wordBytes)
|
||||
io.cpu.resp.bits.data := loadgen.data | s2_sc_fail
|
||||
io.cpu.resp.bits.data_word_bypass := loadgen.wordData
|
||||
io.cpu.resp.bits.store_data := pstore1_data
|
||||
|
||||
// AMOs
|
||||
if (usingAtomics) {
|
||||
val amoalu = Module(new AMOALU(xLen))
|
||||
amoalu.io.addr := pstore1_addr
|
||||
amoalu.io.cmd := pstore1_cmd
|
||||
amoalu.io.typ := pstore1_typ
|
||||
amoalu.io.lhs := s2_data_word
|
||||
amoalu.io.rhs := pstore1_data
|
||||
pstore1_storegen_data := amoalu.io.out
|
||||
} else {
|
||||
assert(!(s1_valid_masked && s1_read && s1_write), "unsupported D$ operation")
|
||||
}
|
||||
|
||||
// flushes
|
||||
val flushed = Reg(init=Bool(true))
|
||||
val flushing = Reg(init=Bool(false))
|
||||
val flushCounter = Counter(nSets * nWays)
|
||||
when (io.mem.acquire.fire()) { flushed := false }
|
||||
when (s2_valid_masked && s2_req.cmd === M_FLUSH_ALL) {
|
||||
io.cpu.s2_nack := !flushed
|
||||
when (!flushed) {
|
||||
flushing := !release_ack_wait
|
||||
}
|
||||
}
|
||||
s1_flush_valid := metaReadArb.io.in(0).fire() && !s1_flush_valid && !s2_flush_valid && release_state === s_ready && !release_ack_wait
|
||||
metaReadArb.io.in(0).valid := flushing
|
||||
metaReadArb.io.in(0).bits.idx := flushCounter.value
|
||||
metaReadArb.io.in(0).bits.way_en := ~UInt(0, nWays)
|
||||
when (flushing) {
|
||||
s1_victim_way := flushCounter.value >> log2Up(nSets)
|
||||
when (s2_flush_valid) {
|
||||
when (flushCounter.inc()) {
|
||||
flushed := true
|
||||
}
|
||||
}
|
||||
when (flushed && release_state === s_ready && !release_ack_wait) {
|
||||
flushing := false
|
||||
}
|
||||
}
|
||||
}
|
||||
203
src/main/scala/rocket/decode.scala
Normal file
203
src/main/scala/rocket/decode.scala
Normal file
@@ -0,0 +1,203 @@
|
||||
// See LICENSE for license details.
|
||||
|
||||
package rocket
|
||||
|
||||
import Chisel._
|
||||
|
||||
object DecodeLogic
|
||||
{
|
||||
def term(lit: BitPat) =
|
||||
new Term(lit.value, BigInt(2).pow(lit.getWidth)-(lit.mask+1))
|
||||
def logic(addr: UInt, addrWidth: Int, cache: scala.collection.mutable.Map[Term,Bool], terms: Seq[Term]) = {
|
||||
terms.map { t =>
|
||||
cache.getOrElseUpdate(t, (if (t.mask == 0) addr else addr & Bits(BigInt(2).pow(addrWidth)-(t.mask+1), addrWidth)) === Bits(t.value, addrWidth))
|
||||
}.foldLeft(Bool(false))(_||_)
|
||||
}
|
||||
def apply(addr: UInt, default: BitPat, mapping: Iterable[(BitPat, BitPat)]): UInt = {
|
||||
val cache = caches.getOrElseUpdate(addr, collection.mutable.Map[Term,Bool]())
|
||||
val dterm = term(default)
|
||||
val (keys, values) = mapping.unzip
|
||||
val addrWidth = keys.map(_.getWidth).max
|
||||
val terms = keys.toList.map(k => term(k))
|
||||
val termvalues = terms zip values.toList.map(term(_))
|
||||
|
||||
for (t <- keys.zip(terms).tails; if !t.isEmpty)
|
||||
for (u <- t.tail)
|
||||
assert(!t.head._2.intersects(u._2), "DecodeLogic: keys " + t.head + " and " + u + " overlap")
|
||||
|
||||
Cat((0 until default.getWidth.max(values.map(_.getWidth).max)).map({ case (i: Int) =>
|
||||
val mint = termvalues.filter { case (k,t) => ((t.mask >> i) & 1) == 0 && ((t.value >> i) & 1) == 1 }.map(_._1)
|
||||
val maxt = termvalues.filter { case (k,t) => ((t.mask >> i) & 1) == 0 && ((t.value >> i) & 1) == 0 }.map(_._1)
|
||||
val dc = termvalues.filter { case (k,t) => ((t.mask >> i) & 1) == 1 }.map(_._1)
|
||||
|
||||
if (((dterm.mask >> i) & 1) != 0) {
|
||||
logic(addr, addrWidth, cache, SimplifyDC(mint, maxt, addrWidth))
|
||||
} else {
|
||||
val defbit = (dterm.value.toInt >> i) & 1
|
||||
val t = if (defbit == 0) mint else maxt
|
||||
val bit = logic(addr, addrWidth, cache, Simplify(t, dc, addrWidth))
|
||||
if (defbit == 0) bit else ~bit
|
||||
}
|
||||
}).reverse)
|
||||
}
|
||||
def apply(addr: UInt, default: Seq[BitPat], mappingIn: Iterable[(BitPat, Seq[BitPat])]): Seq[UInt] = {
|
||||
val mapping = collection.mutable.ArrayBuffer.fill(default.size)(collection.mutable.ArrayBuffer[(BitPat, BitPat)]())
|
||||
for ((key, values) <- mappingIn)
|
||||
for ((value, i) <- values zipWithIndex)
|
||||
mapping(i) += key -> value
|
||||
for ((thisDefault, thisMapping) <- default zip mapping)
|
||||
yield apply(addr, thisDefault, thisMapping)
|
||||
}
|
||||
def apply(addr: UInt, default: Seq[BitPat], mappingIn: List[(UInt, Seq[BitPat])]): Seq[UInt] =
|
||||
apply(addr, default, mappingIn.map(m => (BitPat(m._1), m._2)).asInstanceOf[Iterable[(BitPat, Seq[BitPat])]])
|
||||
def apply(addr: UInt, trues: Iterable[UInt], falses: Iterable[UInt]): Bool =
|
||||
apply(addr, BitPat.dontCare(1), trues.map(BitPat(_) -> BitPat("b1")) ++ falses.map(BitPat(_) -> BitPat("b0"))).toBool
|
||||
private val caches = collection.mutable.Map[UInt,collection.mutable.Map[Term,Bool]]()
|
||||
}
|
||||
|
||||
class Term(val value: BigInt, val mask: BigInt = 0)
|
||||
{
|
||||
var prime = true
|
||||
|
||||
def covers(x: Term) = ((value ^ x.value) &~ mask | x.mask &~ mask) == 0
|
||||
def intersects(x: Term) = ((value ^ x.value) &~ mask &~ x.mask) == 0
|
||||
override def equals(that: Any) = that match {
|
||||
case x: Term => x.value == value && x.mask == mask
|
||||
case _ => false
|
||||
}
|
||||
override def hashCode = value.toInt
|
||||
def < (that: Term) = value < that.value || value == that.value && mask < that.mask
|
||||
def similar(x: Term) = {
|
||||
val diff = value - x.value
|
||||
mask == x.mask && value > x.value && (diff & diff-1) == 0
|
||||
}
|
||||
def merge(x: Term) = {
|
||||
prime = false
|
||||
x.prime = false
|
||||
val bit = value - x.value
|
||||
new Term(value &~ bit, mask | bit)
|
||||
}
|
||||
|
||||
override def toString = value.toString(16) + "-" + mask.toString(16) + (if (prime) "p" else "")
|
||||
}
|
||||
|
||||
object Simplify
|
||||
{
|
||||
def getPrimeImplicants(implicants: Seq[Term], bits: Int) = {
|
||||
var prime = List[Term]()
|
||||
implicants.foreach(_.prime = true)
|
||||
val cols = (0 to bits).map(b => implicants.filter(b == _.mask.bitCount))
|
||||
val table = cols.map(c => (0 to bits).map(b => collection.mutable.Set(c.filter(b == _.value.bitCount):_*)))
|
||||
for (i <- 0 to bits) {
|
||||
for (j <- 0 until bits-i)
|
||||
table(i)(j).foreach(a => table(i+1)(j) ++= table(i)(j+1).filter(_.similar(a)).map(_.merge(a)))
|
||||
for (r <- table(i))
|
||||
for (p <- r; if p.prime)
|
||||
prime = p :: prime
|
||||
}
|
||||
prime.sortWith(_<_)
|
||||
}
|
||||
def getEssentialPrimeImplicants(prime: Seq[Term], minterms: Seq[Term]): (Seq[Term],Seq[Term],Seq[Term]) = {
|
||||
for (i <- 0 until prime.size) {
|
||||
val icover = minterms.filter(prime(i) covers _)
|
||||
for (j <- 0 until prime.size) {
|
||||
val jcover = minterms.filter(prime(j) covers _)
|
||||
if (icover.size > jcover.size && jcover.forall(prime(i) covers _))
|
||||
return getEssentialPrimeImplicants(prime.filter(_ != prime(j)), minterms)
|
||||
}
|
||||
}
|
||||
|
||||
val essentiallyCovered = minterms.filter(t => prime.count(_ covers t) == 1)
|
||||
val essential = prime.filter(p => essentiallyCovered.exists(p covers _))
|
||||
val nonessential = prime.filterNot(essential contains _)
|
||||
val uncovered = minterms.filterNot(t => essential.exists(_ covers t))
|
||||
if (essential.isEmpty || uncovered.isEmpty)
|
||||
(essential, nonessential, uncovered)
|
||||
else {
|
||||
val (a, b, c) = getEssentialPrimeImplicants(nonessential, uncovered)
|
||||
(essential ++ a, b, c)
|
||||
}
|
||||
}
|
||||
def getCost(cover: Seq[Term], bits: Int) = cover.map(bits - _.mask.bitCount).sum
|
||||
def cheaper(a: List[Term], b: List[Term], bits: Int) = {
|
||||
val ca = getCost(a, bits)
|
||||
val cb = getCost(b, bits)
|
||||
def listLess(a: List[Term], b: List[Term]): Boolean = !b.isEmpty && (a.isEmpty || a.head < b.head || a.head == b.head && listLess(a.tail, b.tail))
|
||||
ca < cb || ca == cb && listLess(a.sortWith(_<_), b.sortWith(_<_))
|
||||
}
|
||||
def getCover(implicants: Seq[Term], minterms: Seq[Term], bits: Int) = {
|
||||
if (minterms.nonEmpty) {
|
||||
val cover = minterms.map(m => implicants.filter(_.covers(m)).map(i => collection.mutable.Set(i)))
|
||||
val all = cover.reduceLeft((c0, c1) => c0.map(a => c1.map(_ ++ a)).reduceLeft(_++_))
|
||||
all.map(_.toList).reduceLeft((a, b) => if (cheaper(a, b, bits)) a else b)
|
||||
} else
|
||||
Seq[Term]()
|
||||
}
|
||||
def stringify(s: Seq[Term], bits: Int) = s.map(t => (0 until bits).map(i => if ((t.mask & (1 << i)) != 0) "x" else ((t.value >> i) & 1).toString).reduceLeft(_+_).reverse).reduceLeft(_+" + "+_)
|
||||
|
||||
def apply(minterms: Seq[Term], dontcares: Seq[Term], bits: Int) = {
|
||||
val prime = getPrimeImplicants(minterms ++ dontcares, bits)
|
||||
minterms.foreach(t => assert(prime.exists(_.covers(t))))
|
||||
val (eprime, prime2, uncovered) = getEssentialPrimeImplicants(prime, minterms)
|
||||
val cover = eprime ++ getCover(prime2, uncovered, bits)
|
||||
minterms.foreach(t => assert(cover.exists(_.covers(t)))) // sanity check
|
||||
cover
|
||||
}
|
||||
}
|
||||
|
||||
object SimplifyDC
|
||||
{
|
||||
def getImplicitDC(maxterms: Seq[Term], term: Term, bits: Int, above: Boolean): Term = {
|
||||
for (i <- 0 until bits) {
|
||||
var t: Term = null
|
||||
if (above && ((term.value | term.mask) & (BigInt(1) << i)) == 0)
|
||||
t = new Term(term.value | (BigInt(1) << i), term.mask)
|
||||
else if (!above && (term.value & (BigInt(1) << i)) != 0)
|
||||
t = new Term(term.value & ~(BigInt(1) << i), term.mask)
|
||||
if (t != null && !maxterms.exists(_.intersects(t)))
|
||||
return t
|
||||
}
|
||||
null
|
||||
}
|
||||
def getPrimeImplicants(minterms: Seq[Term], maxterms: Seq[Term], bits: Int) = {
|
||||
var prime = List[Term]()
|
||||
minterms.foreach(_.prime = true)
|
||||
var mint = minterms.map(t => new Term(t.value, t.mask))
|
||||
val cols = (0 to bits).map(b => mint.filter(b == _.mask.bitCount))
|
||||
val table = cols.map(c => (0 to bits).map(b => collection.mutable.Set(c.filter(b == _.value.bitCount):_*)))
|
||||
|
||||
for (i <- 0 to bits) {
|
||||
for (j <- 0 until bits-i) {
|
||||
table(i)(j).foreach(a => table(i+1)(j) ++= table(i)(j+1).filter(_ similar a).map(_ merge a))
|
||||
}
|
||||
for (j <- 0 until bits-i) {
|
||||
for (a <- table(i)(j).filter(_.prime)) {
|
||||
val dc = getImplicitDC(maxterms, a, bits, true)
|
||||
if (dc != null)
|
||||
table(i+1)(j) += dc merge a
|
||||
}
|
||||
for (a <- table(i)(j+1).filter(_.prime)) {
|
||||
val dc = getImplicitDC(maxterms, a, bits, false)
|
||||
if (dc != null)
|
||||
table(i+1)(j) += a merge dc
|
||||
}
|
||||
}
|
||||
for (r <- table(i))
|
||||
for (p <- r; if p.prime)
|
||||
prime = p :: prime
|
||||
}
|
||||
prime.sortWith(_<_)
|
||||
}
|
||||
|
||||
def verify(cover: Seq[Term], minterms: Seq[Term], maxterms: Seq[Term]) = {
|
||||
assert(minterms.forall(t => cover.exists(_ covers t)))
|
||||
assert(maxterms.forall(t => !cover.exists(_ intersects t)))
|
||||
}
|
||||
def apply(minterms: Seq[Term], maxterms: Seq[Term], bits: Int) = {
|
||||
val prime = getPrimeImplicants(minterms, maxterms, bits)
|
||||
val (eprime, prime2, uncovered) = Simplify.getEssentialPrimeImplicants(prime, minterms)
|
||||
val cover = eprime ++ Simplify.getCover(prime2, uncovered, bits)
|
||||
verify(cover, minterms, maxterms)
|
||||
cover
|
||||
}
|
||||
}
|
||||
96
src/main/scala/rocket/dpath_alu.scala
Normal file
96
src/main/scala/rocket/dpath_alu.scala
Normal file
@@ -0,0 +1,96 @@
|
||||
// See LICENSE for license details.
|
||||
|
||||
package rocket
|
||||
|
||||
import Chisel._
|
||||
import cde.{Parameters, Field}
|
||||
import Instructions._
|
||||
|
||||
object ALU
|
||||
{
|
||||
val SZ_ALU_FN = 4
|
||||
val FN_X = BitPat("b????")
|
||||
val FN_ADD = UInt(0)
|
||||
val FN_SL = UInt(1)
|
||||
val FN_SEQ = UInt(2)
|
||||
val FN_SNE = UInt(3)
|
||||
val FN_XOR = UInt(4)
|
||||
val FN_SR = UInt(5)
|
||||
val FN_OR = UInt(6)
|
||||
val FN_AND = UInt(7)
|
||||
val FN_SUB = UInt(10)
|
||||
val FN_SRA = UInt(11)
|
||||
val FN_SLT = UInt(12)
|
||||
val FN_SGE = UInt(13)
|
||||
val FN_SLTU = UInt(14)
|
||||
val FN_SGEU = UInt(15)
|
||||
|
||||
val FN_DIV = FN_XOR
|
||||
val FN_DIVU = FN_SR
|
||||
val FN_REM = FN_OR
|
||||
val FN_REMU = FN_AND
|
||||
|
||||
val FN_MUL = FN_ADD
|
||||
val FN_MULH = FN_SL
|
||||
val FN_MULHSU = FN_SLT
|
||||
val FN_MULHU = FN_SLTU
|
||||
|
||||
def isMulFN(fn: UInt, cmp: UInt) = fn(1,0) === cmp(1,0)
|
||||
def isSub(cmd: UInt) = cmd(3)
|
||||
def isCmp(cmd: UInt) = cmd === FN_SEQ || cmd === FN_SNE || cmd >= FN_SLT
|
||||
def cmpUnsigned(cmd: UInt) = cmd(1)
|
||||
def cmpInverted(cmd: UInt) = cmd(0)
|
||||
def cmpEq(cmd: UInt) = !cmd(3)
|
||||
}
|
||||
import ALU._
|
||||
|
||||
class ALU(implicit p: Parameters) extends CoreModule()(p) {
|
||||
val io = new Bundle {
|
||||
val dw = Bits(INPUT, SZ_DW)
|
||||
val fn = Bits(INPUT, SZ_ALU_FN)
|
||||
val in2 = UInt(INPUT, xLen)
|
||||
val in1 = UInt(INPUT, xLen)
|
||||
val out = UInt(OUTPUT, xLen)
|
||||
val adder_out = UInt(OUTPUT, xLen)
|
||||
val cmp_out = Bool(OUTPUT)
|
||||
}
|
||||
|
||||
// ADD, SUB
|
||||
val in2_inv = Mux(isSub(io.fn), ~io.in2, io.in2)
|
||||
val in1_xor_in2 = io.in1 ^ in2_inv
|
||||
io.adder_out := io.in1 + in2_inv + isSub(io.fn)
|
||||
|
||||
// SLT, SLTU
|
||||
io.cmp_out := cmpInverted(io.fn) ^
|
||||
Mux(cmpEq(io.fn), in1_xor_in2 === UInt(0),
|
||||
Mux(io.in1(xLen-1) === io.in2(xLen-1), io.adder_out(xLen-1),
|
||||
Mux(cmpUnsigned(io.fn), io.in2(xLen-1), io.in1(xLen-1))))
|
||||
|
||||
// SLL, SRL, SRA
|
||||
val (shamt, shin_r) =
|
||||
if (xLen == 32) (io.in2(4,0), io.in1)
|
||||
else {
|
||||
require(xLen == 64)
|
||||
val shin_hi_32 = Fill(32, isSub(io.fn) && io.in1(31))
|
||||
val shin_hi = Mux(io.dw === DW_64, io.in1(63,32), shin_hi_32)
|
||||
val shamt = Cat(io.in2(5) & (io.dw === DW_64), io.in2(4,0))
|
||||
(shamt, Cat(shin_hi, io.in1(31,0)))
|
||||
}
|
||||
val shin = Mux(io.fn === FN_SR || io.fn === FN_SRA, shin_r, Reverse(shin_r))
|
||||
val shout_r = (Cat(isSub(io.fn) & shin(xLen-1), shin).asSInt >> shamt)(xLen-1,0)
|
||||
val shout_l = Reverse(shout_r)
|
||||
val shout = Mux(io.fn === FN_SR || io.fn === FN_SRA, shout_r, UInt(0)) |
|
||||
Mux(io.fn === FN_SL, shout_l, UInt(0))
|
||||
|
||||
// AND, OR, XOR
|
||||
val logic = Mux(io.fn === FN_XOR || io.fn === FN_OR, in1_xor_in2, UInt(0)) |
|
||||
Mux(io.fn === FN_OR || io.fn === FN_AND, io.in1 & io.in2, UInt(0))
|
||||
val shift_logic = (isCmp(io.fn) && io.cmp_out) | logic | shout
|
||||
val out = Mux(io.fn === FN_ADD || io.fn === FN_SUB, io.adder_out, shift_logic)
|
||||
|
||||
io.out := out
|
||||
if (xLen > 32) {
|
||||
require(xLen == 64)
|
||||
when (io.dw === DW_32) { io.out := Cat(Fill(32, out(31)), out(31,0)) }
|
||||
}
|
||||
}
|
||||
654
src/main/scala/rocket/fpu.scala
Normal file
654
src/main/scala/rocket/fpu.scala
Normal file
@@ -0,0 +1,654 @@
|
||||
// See LICENSE for license details.
|
||||
|
||||
package rocket
|
||||
|
||||
import Chisel._
|
||||
import Instructions._
|
||||
import Util._
|
||||
import FPConstants._
|
||||
import uncore.constants.MemoryOpConstants._
|
||||
import uncore.util._
|
||||
import cde.{Parameters, Field}
|
||||
|
||||
case class FPUConfig(
|
||||
divSqrt: Boolean = true,
|
||||
sfmaLatency: Int = 2,
|
||||
dfmaLatency: Int = 3
|
||||
)
|
||||
|
||||
object FPConstants
|
||||
{
|
||||
val FCMD_ADD = BitPat("b0??00")
|
||||
val FCMD_SUB = BitPat("b0??01")
|
||||
val FCMD_MUL = BitPat("b0??10")
|
||||
val FCMD_MADD = BitPat("b1??00")
|
||||
val FCMD_MSUB = BitPat("b1??01")
|
||||
val FCMD_NMSUB = BitPat("b1??10")
|
||||
val FCMD_NMADD = BitPat("b1??11")
|
||||
val FCMD_DIV = BitPat("b?0011")
|
||||
val FCMD_SQRT = BitPat("b?1011")
|
||||
val FCMD_SGNJ = BitPat("b??1?0")
|
||||
val FCMD_MINMAX = BitPat("b?01?1")
|
||||
val FCMD_CVT_FF = BitPat("b??0??")
|
||||
val FCMD_CVT_IF = BitPat("b?10??")
|
||||
val FCMD_CMP = BitPat("b?01??")
|
||||
val FCMD_MV_XF = BitPat("b?11??")
|
||||
val FCMD_CVT_FI = BitPat("b??0??")
|
||||
val FCMD_MV_FX = BitPat("b??1??")
|
||||
val FCMD_X = BitPat("b?????")
|
||||
val FCMD_WIDTH = 5
|
||||
|
||||
val RM_SZ = 3
|
||||
val FLAGS_SZ = 5
|
||||
}
|
||||
|
||||
class FPUCtrlSigs extends Bundle
|
||||
{
|
||||
val cmd = Bits(width = FCMD_WIDTH)
|
||||
val ldst = Bool()
|
||||
val wen = Bool()
|
||||
val ren1 = Bool()
|
||||
val ren2 = Bool()
|
||||
val ren3 = Bool()
|
||||
val swap12 = Bool()
|
||||
val swap23 = Bool()
|
||||
val single = Bool()
|
||||
val fromint = Bool()
|
||||
val toint = Bool()
|
||||
val fastpipe = Bool()
|
||||
val fma = Bool()
|
||||
val div = Bool()
|
||||
val sqrt = Bool()
|
||||
val round = Bool()
|
||||
val wflags = Bool()
|
||||
}
|
||||
|
||||
class FPUDecoder extends Module
|
||||
{
|
||||
val io = new Bundle {
|
||||
val inst = Bits(INPUT, 32)
|
||||
val sigs = new FPUCtrlSigs().asOutput
|
||||
}
|
||||
|
||||
val decoder = DecodeLogic(io.inst,
|
||||
List (FCMD_X, X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X),
|
||||
Array(FLW -> List(FCMD_X, Y,Y,N,N,N,X,X,Y,N,N,N,N,N,N,N,N),
|
||||
FLD -> List(FCMD_X, Y,Y,N,N,N,X,X,N,N,N,N,N,N,N,N,N),
|
||||
FSW -> List(FCMD_MV_XF, Y,N,N,Y,N,Y,X,Y,N,Y,N,N,N,N,N,N),
|
||||
FSD -> List(FCMD_MV_XF, Y,N,N,Y,N,Y,X,N,N,Y,N,N,N,N,N,N),
|
||||
FMV_S_X -> List(FCMD_MV_FX, N,Y,N,N,N,X,X,Y,Y,N,N,N,N,N,Y,N),
|
||||
FMV_D_X -> List(FCMD_MV_FX, N,Y,N,N,N,X,X,N,Y,N,N,N,N,N,Y,N),
|
||||
FCVT_S_W -> List(FCMD_CVT_FI, N,Y,N,N,N,X,X,Y,Y,N,N,N,N,N,Y,Y),
|
||||
FCVT_S_WU-> List(FCMD_CVT_FI, N,Y,N,N,N,X,X,Y,Y,N,N,N,N,N,Y,Y),
|
||||
FCVT_S_L -> List(FCMD_CVT_FI, N,Y,N,N,N,X,X,Y,Y,N,N,N,N,N,Y,Y),
|
||||
FCVT_S_LU-> List(FCMD_CVT_FI, N,Y,N,N,N,X,X,Y,Y,N,N,N,N,N,Y,Y),
|
||||
FCVT_D_W -> List(FCMD_CVT_FI, N,Y,N,N,N,X,X,N,Y,N,N,N,N,N,Y,Y),
|
||||
FCVT_D_WU-> List(FCMD_CVT_FI, N,Y,N,N,N,X,X,N,Y,N,N,N,N,N,Y,Y),
|
||||
FCVT_D_L -> List(FCMD_CVT_FI, N,Y,N,N,N,X,X,N,Y,N,N,N,N,N,Y,Y),
|
||||
FCVT_D_LU-> List(FCMD_CVT_FI, N,Y,N,N,N,X,X,N,Y,N,N,N,N,N,Y,Y),
|
||||
FMV_X_S -> List(FCMD_MV_XF, N,N,Y,N,N,N,X,Y,N,Y,N,N,N,N,Y,N),
|
||||
FMV_X_D -> List(FCMD_MV_XF, N,N,Y,N,N,N,X,N,N,Y,N,N,N,N,Y,N),
|
||||
FCLASS_S -> List(FCMD_MV_XF, N,N,Y,N,N,N,X,Y,N,Y,N,N,N,N,Y,N),
|
||||
FCLASS_D -> List(FCMD_MV_XF, N,N,Y,N,N,N,X,N,N,Y,N,N,N,N,Y,N),
|
||||
FCVT_W_S -> List(FCMD_CVT_IF, N,N,Y,N,N,N,X,Y,N,Y,N,N,N,N,Y,Y),
|
||||
FCVT_WU_S-> List(FCMD_CVT_IF, N,N,Y,N,N,N,X,Y,N,Y,N,N,N,N,Y,Y),
|
||||
FCVT_L_S -> List(FCMD_CVT_IF, N,N,Y,N,N,N,X,Y,N,Y,N,N,N,N,Y,Y),
|
||||
FCVT_LU_S-> List(FCMD_CVT_IF, N,N,Y,N,N,N,X,Y,N,Y,N,N,N,N,Y,Y),
|
||||
FCVT_W_D -> List(FCMD_CVT_IF, N,N,Y,N,N,N,X,N,N,Y,N,N,N,N,Y,Y),
|
||||
FCVT_WU_D-> List(FCMD_CVT_IF, N,N,Y,N,N,N,X,N,N,Y,N,N,N,N,Y,Y),
|
||||
FCVT_L_D -> List(FCMD_CVT_IF, N,N,Y,N,N,N,X,N,N,Y,N,N,N,N,Y,Y),
|
||||
FCVT_LU_D-> List(FCMD_CVT_IF, N,N,Y,N,N,N,X,N,N,Y,N,N,N,N,Y,Y),
|
||||
FCVT_S_D -> List(FCMD_CVT_FF, N,Y,Y,N,N,N,X,Y,N,N,Y,N,N,N,Y,Y),
|
||||
FCVT_D_S -> List(FCMD_CVT_FF, N,Y,Y,N,N,N,X,N,N,N,Y,N,N,N,Y,Y),
|
||||
FEQ_S -> List(FCMD_CMP, N,N,Y,Y,N,N,N,Y,N,Y,N,N,N,N,N,Y),
|
||||
FLT_S -> List(FCMD_CMP, N,N,Y,Y,N,N,N,Y,N,Y,N,N,N,N,N,Y),
|
||||
FLE_S -> List(FCMD_CMP, N,N,Y,Y,N,N,N,Y,N,Y,N,N,N,N,N,Y),
|
||||
FEQ_D -> List(FCMD_CMP, N,N,Y,Y,N,N,N,N,N,Y,N,N,N,N,N,Y),
|
||||
FLT_D -> List(FCMD_CMP, N,N,Y,Y,N,N,N,N,N,Y,N,N,N,N,N,Y),
|
||||
FLE_D -> List(FCMD_CMP, N,N,Y,Y,N,N,N,N,N,Y,N,N,N,N,N,Y),
|
||||
FSGNJ_S -> List(FCMD_SGNJ, N,Y,Y,Y,N,N,N,Y,N,N,Y,N,N,N,N,N),
|
||||
FSGNJN_S -> List(FCMD_SGNJ, N,Y,Y,Y,N,N,N,Y,N,N,Y,N,N,N,N,N),
|
||||
FSGNJX_S -> List(FCMD_SGNJ, N,Y,Y,Y,N,N,N,Y,N,N,Y,N,N,N,N,N),
|
||||
FSGNJ_D -> List(FCMD_SGNJ, N,Y,Y,Y,N,N,N,N,N,N,Y,N,N,N,N,N),
|
||||
FSGNJN_D -> List(FCMD_SGNJ, N,Y,Y,Y,N,N,N,N,N,N,Y,N,N,N,N,N),
|
||||
FSGNJX_D -> List(FCMD_SGNJ, N,Y,Y,Y,N,N,N,N,N,N,Y,N,N,N,N,N),
|
||||
FMIN_S -> List(FCMD_MINMAX, N,Y,Y,Y,N,N,N,Y,N,N,Y,N,N,N,N,Y),
|
||||
FMAX_S -> List(FCMD_MINMAX, N,Y,Y,Y,N,N,N,Y,N,N,Y,N,N,N,N,Y),
|
||||
FMIN_D -> List(FCMD_MINMAX, N,Y,Y,Y,N,N,N,N,N,N,Y,N,N,N,N,Y),
|
||||
FMAX_D -> List(FCMD_MINMAX, N,Y,Y,Y,N,N,N,N,N,N,Y,N,N,N,N,Y),
|
||||
FADD_S -> List(FCMD_ADD, N,Y,Y,Y,N,N,Y,Y,N,N,N,Y,N,N,Y,Y),
|
||||
FSUB_S -> List(FCMD_SUB, N,Y,Y,Y,N,N,Y,Y,N,N,N,Y,N,N,Y,Y),
|
||||
FMUL_S -> List(FCMD_MUL, N,Y,Y,Y,N,N,N,Y,N,N,N,Y,N,N,Y,Y),
|
||||
FADD_D -> List(FCMD_ADD, N,Y,Y,Y,N,N,Y,N,N,N,N,Y,N,N,Y,Y),
|
||||
FSUB_D -> List(FCMD_SUB, N,Y,Y,Y,N,N,Y,N,N,N,N,Y,N,N,Y,Y),
|
||||
FMUL_D -> List(FCMD_MUL, N,Y,Y,Y,N,N,N,N,N,N,N,Y,N,N,Y,Y),
|
||||
FMADD_S -> List(FCMD_MADD, N,Y,Y,Y,Y,N,N,Y,N,N,N,Y,N,N,Y,Y),
|
||||
FMSUB_S -> List(FCMD_MSUB, N,Y,Y,Y,Y,N,N,Y,N,N,N,Y,N,N,Y,Y),
|
||||
FNMADD_S -> List(FCMD_NMADD, N,Y,Y,Y,Y,N,N,Y,N,N,N,Y,N,N,Y,Y),
|
||||
FNMSUB_S -> List(FCMD_NMSUB, N,Y,Y,Y,Y,N,N,Y,N,N,N,Y,N,N,Y,Y),
|
||||
FMADD_D -> List(FCMD_MADD, N,Y,Y,Y,Y,N,N,N,N,N,N,Y,N,N,Y,Y),
|
||||
FMSUB_D -> List(FCMD_MSUB, N,Y,Y,Y,Y,N,N,N,N,N,N,Y,N,N,Y,Y),
|
||||
FNMADD_D -> List(FCMD_NMADD, N,Y,Y,Y,Y,N,N,N,N,N,N,Y,N,N,Y,Y),
|
||||
FNMSUB_D -> List(FCMD_NMSUB, N,Y,Y,Y,Y,N,N,N,N,N,N,Y,N,N,Y,Y),
|
||||
FDIV_S -> List(FCMD_DIV, N,Y,Y,Y,N,N,N,Y,N,N,N,N,Y,N,Y,Y),
|
||||
FSQRT_S -> List(FCMD_SQRT, N,Y,Y,N,N,Y,X,Y,N,N,N,N,N,Y,Y,Y),
|
||||
FDIV_D -> List(FCMD_DIV, N,Y,Y,Y,N,N,N,N,N,N,N,N,Y,N,Y,Y),
|
||||
FSQRT_D -> List(FCMD_SQRT, N,Y,Y,N,N,Y,X,N,N,N,N,N,N,Y,Y,Y)
|
||||
))
|
||||
val s = io.sigs
|
||||
val sigs = Seq(s.cmd, s.ldst, s.wen, s.ren1, s.ren2, s.ren3, s.swap12,
|
||||
s.swap23, s.single, s.fromint, s.toint, s.fastpipe, s.fma,
|
||||
s.div, s.sqrt, s.round, s.wflags)
|
||||
sigs zip decoder map {case(s,d) => s := d}
|
||||
}
|
||||
|
||||
class FPUIO(implicit p: Parameters) extends CoreBundle {
|
||||
val inst = Bits(INPUT, 32)
|
||||
val fromint_data = Bits(INPUT, xLen)
|
||||
|
||||
val fcsr_rm = Bits(INPUT, FPConstants.RM_SZ)
|
||||
val fcsr_flags = Valid(Bits(width = FPConstants.FLAGS_SZ))
|
||||
|
||||
val store_data = Bits(OUTPUT, 64)
|
||||
val toint_data = Bits(OUTPUT, xLen)
|
||||
|
||||
val dmem_resp_val = Bool(INPUT)
|
||||
val dmem_resp_type = Bits(INPUT, 3)
|
||||
val dmem_resp_tag = UInt(INPUT, 5)
|
||||
val dmem_resp_data = Bits(INPUT, 64)
|
||||
|
||||
val valid = Bool(INPUT)
|
||||
val fcsr_rdy = Bool(OUTPUT)
|
||||
val nack_mem = Bool(OUTPUT)
|
||||
val illegal_rm = Bool(OUTPUT)
|
||||
val killx = Bool(INPUT)
|
||||
val killm = Bool(INPUT)
|
||||
val dec = new FPUCtrlSigs().asOutput
|
||||
val sboard_set = Bool(OUTPUT)
|
||||
val sboard_clr = Bool(OUTPUT)
|
||||
val sboard_clra = UInt(OUTPUT, 5)
|
||||
|
||||
val cp_req = Decoupled(new FPInput()).flip //cp doesn't pay attn to kill sigs
|
||||
val cp_resp = Decoupled(new FPResult())
|
||||
}
|
||||
|
||||
class FPResult extends Bundle
|
||||
{
|
||||
val data = Bits(width = 65)
|
||||
val exc = Bits(width = 5)
|
||||
}
|
||||
|
||||
class FPInput extends FPUCtrlSigs {
|
||||
val rm = Bits(width = 3)
|
||||
val typ = Bits(width = 2)
|
||||
val in1 = Bits(width = 65)
|
||||
val in2 = Bits(width = 65)
|
||||
val in3 = Bits(width = 65)
|
||||
}
|
||||
|
||||
object ClassifyRecFN {
|
||||
def apply(expWidth: Int, sigWidth: Int, in: UInt) = {
|
||||
val sign = in(sigWidth + expWidth)
|
||||
val exp = in(sigWidth + expWidth - 1, sigWidth - 1)
|
||||
val sig = in(sigWidth - 2, 0)
|
||||
|
||||
val code = exp(expWidth,expWidth-2)
|
||||
val codeHi = code(2, 1)
|
||||
val isSpecial = codeHi === UInt(3)
|
||||
|
||||
val isHighSubnormalIn = exp(expWidth-2, 0) < UInt(2)
|
||||
val isSubnormal = code === UInt(1) || codeHi === UInt(1) && isHighSubnormalIn
|
||||
val isNormal = codeHi === UInt(1) && !isHighSubnormalIn || codeHi === UInt(2)
|
||||
val isZero = code === UInt(0)
|
||||
val isInf = isSpecial && !exp(expWidth-2)
|
||||
val isNaN = code.andR
|
||||
val isSNaN = isNaN && !sig(sigWidth-2)
|
||||
val isQNaN = isNaN && sig(sigWidth-2)
|
||||
|
||||
Cat(isQNaN, isSNaN, isInf && !sign, isNormal && !sign,
|
||||
isSubnormal && !sign, isZero && !sign, isZero && sign,
|
||||
isSubnormal && sign, isNormal && sign, isInf && sign)
|
||||
}
|
||||
}
|
||||
|
||||
class FPToInt extends Module
|
||||
{
|
||||
val io = new Bundle {
|
||||
val in = Valid(new FPInput).flip
|
||||
val as_double = new FPInput().asOutput
|
||||
val out = Valid(new Bundle {
|
||||
val lt = Bool()
|
||||
val store = Bits(width = 64)
|
||||
val toint = Bits(width = 64)
|
||||
val exc = Bits(width = 5)
|
||||
})
|
||||
}
|
||||
|
||||
val in = Reg(new FPInput)
|
||||
val valid = Reg(next=io.in.valid)
|
||||
|
||||
def upconvert(x: UInt) = {
|
||||
val s2d = Module(new hardfloat.RecFNToRecFN(8, 24, 11, 53))
|
||||
s2d.io.in := x
|
||||
s2d.io.roundingMode := UInt(0)
|
||||
s2d.io.out
|
||||
}
|
||||
|
||||
val in1_upconvert = upconvert(io.in.bits.in1)
|
||||
val in2_upconvert = upconvert(io.in.bits.in2)
|
||||
|
||||
when (io.in.valid) {
|
||||
in := io.in.bits
|
||||
when (io.in.bits.single && !io.in.bits.ldst && io.in.bits.cmd =/= FCMD_MV_XF) {
|
||||
in.in1 := in1_upconvert
|
||||
in.in2 := in2_upconvert
|
||||
}
|
||||
}
|
||||
|
||||
val unrec_s = hardfloat.fNFromRecFN(8, 24, in.in1)
|
||||
val unrec_d = hardfloat.fNFromRecFN(11, 53, in.in1)
|
||||
val unrec_out = Mux(in.single, Cat(Fill(32, unrec_s(31)), unrec_s), unrec_d)
|
||||
|
||||
val classify_s = ClassifyRecFN(8, 24, in.in1)
|
||||
val classify_d = ClassifyRecFN(11, 53, in.in1)
|
||||
val classify_out = Mux(in.single, classify_s, classify_d)
|
||||
|
||||
val dcmp = Module(new hardfloat.CompareRecFN(11, 53))
|
||||
dcmp.io.a := in.in1
|
||||
dcmp.io.b := in.in2
|
||||
dcmp.io.signaling := Bool(true)
|
||||
val dcmp_out = (~in.rm & Cat(dcmp.io.lt, dcmp.io.eq)).orR
|
||||
val dcmp_exc = dcmp.io.exceptionFlags
|
||||
|
||||
val d2l = Module(new hardfloat.RecFNToIN(11, 53, 64))
|
||||
val d2w = Module(new hardfloat.RecFNToIN(11, 53, 32))
|
||||
d2l.io.in := in.in1
|
||||
d2l.io.roundingMode := in.rm
|
||||
d2l.io.signedOut := ~in.typ(0)
|
||||
d2w.io.in := in.in1
|
||||
d2w.io.roundingMode := in.rm
|
||||
d2w.io.signedOut := ~in.typ(0)
|
||||
|
||||
io.out.bits.toint := Mux(in.rm(0), classify_out, unrec_out)
|
||||
io.out.bits.store := unrec_out
|
||||
io.out.bits.exc := Bits(0)
|
||||
|
||||
when (in.cmd === FCMD_CMP) {
|
||||
io.out.bits.toint := dcmp_out
|
||||
io.out.bits.exc := dcmp_exc
|
||||
}
|
||||
when (in.cmd === FCMD_CVT_IF) {
|
||||
io.out.bits.toint := Mux(in.typ(1), d2l.io.out.asSInt, d2w.io.out.asSInt).asUInt
|
||||
val dflags = Mux(in.typ(1), d2l.io.intExceptionFlags, d2w.io.intExceptionFlags)
|
||||
io.out.bits.exc := Cat(dflags(2, 1).orR, UInt(0, 3), dflags(0))
|
||||
}
|
||||
|
||||
io.out.valid := valid
|
||||
io.out.bits.lt := dcmp.io.lt
|
||||
io.as_double := in
|
||||
}
|
||||
|
||||
class IntToFP(val latency: Int) extends Module
|
||||
{
|
||||
val io = new Bundle {
|
||||
val in = Valid(new FPInput).flip
|
||||
val out = Valid(new FPResult)
|
||||
}
|
||||
|
||||
val in = Pipe(io.in)
|
||||
|
||||
val mux = Wire(new FPResult)
|
||||
mux.exc := Bits(0)
|
||||
mux.data := hardfloat.recFNFromFN(11, 53, in.bits.in1)
|
||||
when (in.bits.single) {
|
||||
mux.data := Cat(SInt(-1, 32), hardfloat.recFNFromFN(8, 24, in.bits.in1))
|
||||
}
|
||||
|
||||
val longValue =
|
||||
Mux(in.bits.typ(1), in.bits.in1.asSInt,
|
||||
Mux(in.bits.typ(0), in.bits.in1(31,0).zext, in.bits.in1(31,0).asSInt))
|
||||
val l2s = Module(new hardfloat.INToRecFN(64, 8, 24))
|
||||
l2s.io.signedIn := ~in.bits.typ(0)
|
||||
l2s.io.in := longValue.asUInt
|
||||
l2s.io.roundingMode := in.bits.rm
|
||||
|
||||
val l2d = Module(new hardfloat.INToRecFN(64, 11, 53))
|
||||
l2d.io.signedIn := ~in.bits.typ(0)
|
||||
l2d.io.in := longValue.asUInt
|
||||
l2d.io.roundingMode := in.bits.rm
|
||||
|
||||
when (in.bits.cmd === FCMD_CVT_FI) {
|
||||
when (in.bits.single) {
|
||||
mux.data := Cat(SInt(-1, 32), l2s.io.out)
|
||||
mux.exc := l2s.io.exceptionFlags
|
||||
}.otherwise {
|
||||
mux.data := l2d.io.out
|
||||
mux.exc := l2d.io.exceptionFlags
|
||||
}
|
||||
}
|
||||
|
||||
io.out <> Pipe(in.valid, mux, latency-1)
|
||||
}
|
||||
|
||||
class FPToFP(val latency: Int) extends Module
|
||||
{
|
||||
val io = new Bundle {
|
||||
val in = Valid(new FPInput).flip
|
||||
val out = Valid(new FPResult)
|
||||
val lt = Bool(INPUT) // from FPToInt
|
||||
}
|
||||
|
||||
val in = Pipe(io.in)
|
||||
|
||||
// fp->fp units
|
||||
val isSgnj = in.bits.cmd === FCMD_SGNJ
|
||||
def fsgnjSign(in1: Bits, in2: Bits, pos: Int, en: Bool, rm: Bits) =
|
||||
Mux(rm(1) || !en, in1(pos), rm(0)) ^ (en && in2(pos))
|
||||
val sign_s = fsgnjSign(in.bits.in1, in.bits.in2, 32, in.bits.single && isSgnj, in.bits.rm)
|
||||
val sign_d = fsgnjSign(in.bits.in1, in.bits.in2, 64, !in.bits.single && isSgnj, in.bits.rm)
|
||||
val fsgnj = Cat(sign_d, in.bits.in1(63,33), sign_s, in.bits.in1(31,0))
|
||||
|
||||
val s2d = Module(new hardfloat.RecFNToRecFN(8, 24, 11, 53))
|
||||
val d2s = Module(new hardfloat.RecFNToRecFN(11, 53, 8, 24))
|
||||
s2d.io.in := in.bits.in1
|
||||
s2d.io.roundingMode := in.bits.rm
|
||||
d2s.io.in := in.bits.in1
|
||||
d2s.io.roundingMode := in.bits.rm
|
||||
|
||||
val isnan1 = Mux(in.bits.single, in.bits.in1(31,29).andR, in.bits.in1(63,61).andR)
|
||||
val isnan2 = Mux(in.bits.single, in.bits.in2(31,29).andR, in.bits.in2(63,61).andR)
|
||||
val issnan1 = isnan1 && ~Mux(in.bits.single, in.bits.in1(22), in.bits.in1(51))
|
||||
val issnan2 = isnan2 && ~Mux(in.bits.single, in.bits.in2(22), in.bits.in2(51))
|
||||
val minmax_exc = Cat(issnan1 || issnan2, Bits(0,4))
|
||||
val isMax = in.bits.rm(0)
|
||||
val isLHS = isnan2 || isMax =/= io.lt && !isnan1
|
||||
|
||||
val mux = Wire(new FPResult)
|
||||
mux.exc := minmax_exc
|
||||
mux.data := in.bits.in2
|
||||
|
||||
when (isSgnj) { mux.exc := UInt(0) }
|
||||
when (isSgnj || isLHS) { mux.data := fsgnj }
|
||||
when (in.bits.cmd === FCMD_CVT_FF) {
|
||||
when (in.bits.single) {
|
||||
mux.data := Cat(SInt(-1, 32), d2s.io.out)
|
||||
mux.exc := d2s.io.exceptionFlags
|
||||
}.otherwise {
|
||||
mux.data := s2d.io.out
|
||||
mux.exc := s2d.io.exceptionFlags
|
||||
}
|
||||
}
|
||||
|
||||
io.out <> Pipe(in.valid, mux, latency-1)
|
||||
}
|
||||
|
||||
class FPUFMAPipe(val latency: Int, expWidth: Int, sigWidth: Int) extends Module
|
||||
{
|
||||
val io = new Bundle {
|
||||
val in = Valid(new FPInput).flip
|
||||
val out = Valid(new FPResult)
|
||||
}
|
||||
|
||||
val width = sigWidth + expWidth
|
||||
val one = UInt(1) << (width-1)
|
||||
val zero = (io.in.bits.in1(width) ^ io.in.bits.in2(width)) << width
|
||||
|
||||
val valid = Reg(next=io.in.valid)
|
||||
val in = Reg(new FPInput)
|
||||
when (io.in.valid) {
|
||||
in := io.in.bits
|
||||
val cmd_fma = io.in.bits.ren3
|
||||
val cmd_addsub = io.in.bits.swap23
|
||||
in.cmd := Cat(io.in.bits.cmd(1) & (cmd_fma || cmd_addsub), io.in.bits.cmd(0))
|
||||
when (cmd_addsub) { in.in2 := one }
|
||||
unless (cmd_fma || cmd_addsub) { in.in3 := zero }
|
||||
}
|
||||
|
||||
val fma = Module(new hardfloat.MulAddRecFN(expWidth, sigWidth))
|
||||
fma.io.op := in.cmd
|
||||
fma.io.roundingMode := in.rm
|
||||
fma.io.a := in.in1
|
||||
fma.io.b := in.in2
|
||||
fma.io.c := in.in3
|
||||
|
||||
val res = Wire(new FPResult)
|
||||
res.data := Cat(SInt(-1, 32), fma.io.out)
|
||||
res.exc := fma.io.exceptionFlags
|
||||
io.out := Pipe(valid, res, latency-1)
|
||||
}
|
||||
|
||||
class FPU(cfg: FPUConfig)(implicit p: Parameters) extends CoreModule()(p) {
|
||||
require(xLen == 64, "RV32 Rocket FP support missing")
|
||||
val io = new FPUIO
|
||||
|
||||
val ex_reg_valid = Reg(next=io.valid, init=Bool(false))
|
||||
val req_valid = ex_reg_valid || io.cp_req.valid
|
||||
val ex_reg_inst = RegEnable(io.inst, io.valid)
|
||||
val ex_cp_valid = io.cp_req.valid && !ex_reg_valid
|
||||
val mem_reg_valid = Reg(next=ex_reg_valid && !io.killx || ex_cp_valid, init=Bool(false))
|
||||
val mem_reg_inst = RegEnable(ex_reg_inst, ex_reg_valid)
|
||||
val mem_cp_valid = Reg(next=ex_cp_valid, init=Bool(false))
|
||||
val killm = (io.killm || io.nack_mem) && !mem_cp_valid
|
||||
val wb_reg_valid = Reg(next=mem_reg_valid && (!killm || mem_cp_valid), init=Bool(false))
|
||||
val wb_cp_valid = Reg(next=mem_cp_valid, init=Bool(false))
|
||||
|
||||
val fp_decoder = Module(new FPUDecoder)
|
||||
fp_decoder.io.inst := io.inst
|
||||
|
||||
val cp_ctrl = Wire(new FPUCtrlSigs)
|
||||
cp_ctrl <> io.cp_req.bits
|
||||
io.cp_resp.valid := Bool(false)
|
||||
io.cp_resp.bits.data := UInt(0)
|
||||
|
||||
val id_ctrl = fp_decoder.io.sigs
|
||||
val ex_ctrl = Mux(ex_reg_valid, RegEnable(id_ctrl, io.valid), cp_ctrl)
|
||||
val mem_ctrl = RegEnable(ex_ctrl, req_valid)
|
||||
val wb_ctrl = RegEnable(mem_ctrl, mem_reg_valid)
|
||||
|
||||
// load response
|
||||
val load_wb = Reg(next=io.dmem_resp_val)
|
||||
val load_wb_single = RegEnable(!io.dmem_resp_type(0), io.dmem_resp_val)
|
||||
val load_wb_data = RegEnable(io.dmem_resp_data, io.dmem_resp_val)
|
||||
val load_wb_tag = RegEnable(io.dmem_resp_tag, io.dmem_resp_val)
|
||||
val rec_s = hardfloat.recFNFromFN(8, 24, load_wb_data)
|
||||
val rec_d = hardfloat.recFNFromFN(11, 53, load_wb_data)
|
||||
val load_wb_data_recoded = Mux(load_wb_single, Cat(SInt(-1, 32), rec_s), rec_d)
|
||||
|
||||
// regfile
|
||||
val regfile = Mem(32, Bits(width = 65))
|
||||
when (load_wb) {
|
||||
regfile(load_wb_tag) := load_wb_data_recoded
|
||||
if (enableCommitLog) {
|
||||
printf ("f%d p%d 0x%x\n", load_wb_tag, load_wb_tag + UInt(32),
|
||||
Mux(load_wb_single, load_wb_data(31,0), load_wb_data))
|
||||
}
|
||||
}
|
||||
|
||||
val ex_ra1::ex_ra2::ex_ra3::Nil = List.fill(3)(Reg(UInt()))
|
||||
when (io.valid) {
|
||||
when (id_ctrl.ren1) {
|
||||
when (!id_ctrl.swap12) { ex_ra1 := io.inst(19,15) }
|
||||
when (id_ctrl.swap12) { ex_ra2 := io.inst(19,15) }
|
||||
}
|
||||
when (id_ctrl.ren2) {
|
||||
when (id_ctrl.swap12) { ex_ra1 := io.inst(24,20) }
|
||||
when (id_ctrl.swap23) { ex_ra3 := io.inst(24,20) }
|
||||
when (!id_ctrl.swap12 && !id_ctrl.swap23) { ex_ra2 := io.inst(24,20) }
|
||||
}
|
||||
when (id_ctrl.ren3) { ex_ra3 := io.inst(31,27) }
|
||||
}
|
||||
val ex_rs1::ex_rs2::ex_rs3::Nil = Seq(ex_ra1, ex_ra2, ex_ra3).map(regfile(_))
|
||||
val ex_rm = Mux(ex_reg_inst(14,12) === Bits(7), io.fcsr_rm, ex_reg_inst(14,12))
|
||||
|
||||
val cp_rs1 = io.cp_req.bits.in1
|
||||
val cp_rs2 = Mux(io.cp_req.bits.swap23, io.cp_req.bits.in3, io.cp_req.bits.in2)
|
||||
val cp_rs3 = Mux(io.cp_req.bits.swap23, io.cp_req.bits.in2, io.cp_req.bits.in3)
|
||||
|
||||
val req = Wire(new FPInput)
|
||||
req := ex_ctrl
|
||||
req.rm := Mux(ex_reg_valid, ex_rm, io.cp_req.bits.rm)
|
||||
req.in1 := Mux(ex_reg_valid, ex_rs1, cp_rs1)
|
||||
req.in2 := Mux(ex_reg_valid, ex_rs2, cp_rs2)
|
||||
req.in3 := Mux(ex_reg_valid, ex_rs3, cp_rs3)
|
||||
req.typ := Mux(ex_reg_valid, ex_reg_inst(21,20), io.cp_req.bits.typ)
|
||||
|
||||
val sfma = Module(new FPUFMAPipe(cfg.sfmaLatency, 8, 24))
|
||||
sfma.io.in.valid := req_valid && ex_ctrl.fma && ex_ctrl.single
|
||||
sfma.io.in.bits := req
|
||||
|
||||
val dfma = Module(new FPUFMAPipe(cfg.dfmaLatency, 11, 53))
|
||||
dfma.io.in.valid := req_valid && ex_ctrl.fma && !ex_ctrl.single
|
||||
dfma.io.in.bits := req
|
||||
|
||||
val fpiu = Module(new FPToInt)
|
||||
fpiu.io.in.valid := req_valid && (ex_ctrl.toint || ex_ctrl.div || ex_ctrl.sqrt || ex_ctrl.cmd === FCMD_MINMAX)
|
||||
fpiu.io.in.bits := req
|
||||
io.store_data := fpiu.io.out.bits.store
|
||||
io.toint_data := fpiu.io.out.bits.toint
|
||||
when(fpiu.io.out.valid && mem_cp_valid && mem_ctrl.toint){
|
||||
io.cp_resp.bits.data := fpiu.io.out.bits.toint
|
||||
io.cp_resp.valid := Bool(true)
|
||||
}
|
||||
|
||||
val ifpu = Module(new IntToFP(3))
|
||||
ifpu.io.in.valid := req_valid && ex_ctrl.fromint
|
||||
ifpu.io.in.bits := req
|
||||
ifpu.io.in.bits.in1 := Mux(ex_reg_valid, io.fromint_data, cp_rs1)
|
||||
|
||||
val fpmu = Module(new FPToFP(2))
|
||||
fpmu.io.in.valid := req_valid && ex_ctrl.fastpipe
|
||||
fpmu.io.in.bits := req
|
||||
fpmu.io.lt := fpiu.io.out.bits.lt
|
||||
|
||||
val divSqrt_wen = Reg(next=Bool(false))
|
||||
val divSqrt_inReady = Wire(init=Bool(false))
|
||||
val divSqrt_waddr = Reg(Bits())
|
||||
val divSqrt_wdata = Wire(Bits())
|
||||
val divSqrt_flags = Wire(Bits())
|
||||
val divSqrt_in_flight = Reg(init=Bool(false))
|
||||
val divSqrt_killed = Reg(Bool())
|
||||
|
||||
// writeback arbitration
|
||||
case class Pipe(p: Module, lat: Int, cond: (FPUCtrlSigs) => Bool, res: FPResult)
|
||||
val pipes = List(
|
||||
Pipe(fpmu, fpmu.latency, (c: FPUCtrlSigs) => c.fastpipe, fpmu.io.out.bits),
|
||||
Pipe(ifpu, ifpu.latency, (c: FPUCtrlSigs) => c.fromint, ifpu.io.out.bits),
|
||||
Pipe(sfma, sfma.latency, (c: FPUCtrlSigs) => c.fma && c.single, sfma.io.out.bits),
|
||||
Pipe(dfma, dfma.latency, (c: FPUCtrlSigs) => c.fma && !c.single, dfma.io.out.bits))
|
||||
def latencyMask(c: FPUCtrlSigs, offset: Int) = {
|
||||
require(pipes.forall(_.lat >= offset))
|
||||
pipes.map(p => Mux(p.cond(c), UInt(1 << p.lat-offset), UInt(0))).reduce(_|_)
|
||||
}
|
||||
def pipeid(c: FPUCtrlSigs) = pipes.zipWithIndex.map(p => Mux(p._1.cond(c), UInt(p._2), UInt(0))).reduce(_|_)
|
||||
val maxLatency = pipes.map(_.lat).max
|
||||
val memLatencyMask = latencyMask(mem_ctrl, 2)
|
||||
|
||||
class WBInfo extends Bundle {
|
||||
val rd = UInt(width = 5)
|
||||
val single = Bool()
|
||||
val cp = Bool()
|
||||
val pipeid = UInt(width = log2Ceil(pipes.size))
|
||||
override def cloneType: this.type = new WBInfo().asInstanceOf[this.type]
|
||||
}
|
||||
|
||||
val wen = Reg(init=Bits(0, maxLatency-1))
|
||||
val wbInfo = Reg(Vec(maxLatency-1, new WBInfo))
|
||||
val mem_wen = mem_reg_valid && (mem_ctrl.fma || mem_ctrl.fastpipe || mem_ctrl.fromint)
|
||||
val write_port_busy = RegEnable(mem_wen && (memLatencyMask & latencyMask(ex_ctrl, 1)).orR || (wen & latencyMask(ex_ctrl, 0)).orR, req_valid)
|
||||
|
||||
for (i <- 0 until maxLatency-2) {
|
||||
when (wen(i+1)) { wbInfo(i) := wbInfo(i+1) }
|
||||
}
|
||||
wen := wen >> 1
|
||||
when (mem_wen) {
|
||||
when (!killm) {
|
||||
wen := wen >> 1 | memLatencyMask
|
||||
}
|
||||
for (i <- 0 until maxLatency-1) {
|
||||
when (!write_port_busy && memLatencyMask(i)) {
|
||||
wbInfo(i).cp := mem_cp_valid
|
||||
wbInfo(i).single := mem_ctrl.single
|
||||
wbInfo(i).pipeid := pipeid(mem_ctrl)
|
||||
wbInfo(i).rd := mem_reg_inst(11,7)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
val waddr = Mux(divSqrt_wen, divSqrt_waddr, wbInfo(0).rd)
|
||||
val wdata = Mux(divSqrt_wen, divSqrt_wdata, (pipes.map(_.res.data): Seq[UInt])(wbInfo(0).pipeid))
|
||||
val wexc = (pipes.map(_.res.exc): Seq[UInt])(wbInfo(0).pipeid)
|
||||
when ((!wbInfo(0).cp && wen(0)) || divSqrt_wen) {
|
||||
regfile(waddr) := wdata
|
||||
if (enableCommitLog) {
|
||||
val wdata_unrec_s = hardfloat.fNFromRecFN(8, 24, wdata(64,0))
|
||||
val wdata_unrec_d = hardfloat.fNFromRecFN(11, 53, wdata(64,0))
|
||||
printf ("f%d p%d 0x%x\n", waddr, waddr+ UInt(32),
|
||||
Mux(wbInfo(0).single, Cat(UInt(0,32), wdata_unrec_s), wdata_unrec_d))
|
||||
}
|
||||
}
|
||||
when (wbInfo(0).cp && wen(0)) {
|
||||
io.cp_resp.bits.data := wdata
|
||||
io.cp_resp.valid := Bool(true)
|
||||
}
|
||||
io.cp_req.ready := !ex_reg_valid
|
||||
|
||||
val wb_toint_valid = wb_reg_valid && wb_ctrl.toint
|
||||
val wb_toint_exc = RegEnable(fpiu.io.out.bits.exc, mem_ctrl.toint)
|
||||
io.fcsr_flags.valid := wb_toint_valid || divSqrt_wen || wen(0)
|
||||
io.fcsr_flags.bits :=
|
||||
Mux(wb_toint_valid, wb_toint_exc, UInt(0)) |
|
||||
Mux(divSqrt_wen, divSqrt_flags, UInt(0)) |
|
||||
Mux(wen(0), wexc, UInt(0))
|
||||
|
||||
val units_busy = mem_reg_valid && (mem_ctrl.div || mem_ctrl.sqrt) && (!divSqrt_inReady || wen.orR) // || mem_reg_valid && mem_ctrl.fma && Reg(next=Mux(ex_ctrl.single, io.sfma.valid, io.dfma.valid))
|
||||
io.fcsr_rdy := !(ex_reg_valid && ex_ctrl.wflags || mem_reg_valid && mem_ctrl.wflags || wb_reg_valid && wb_ctrl.toint || wen.orR || divSqrt_in_flight)
|
||||
io.nack_mem := units_busy || write_port_busy || divSqrt_in_flight
|
||||
io.dec <> fp_decoder.io.sigs
|
||||
def useScoreboard(f: ((Pipe, Int)) => Bool) = pipes.zipWithIndex.filter(_._1.lat > 3).map(x => f(x)).fold(Bool(false))(_||_)
|
||||
io.sboard_set := wb_reg_valid && !wb_cp_valid && Reg(next=useScoreboard(_._1.cond(mem_ctrl)) || mem_ctrl.div || mem_ctrl.sqrt)
|
||||
io.sboard_clr := !wb_cp_valid && (divSqrt_wen || (wen(0) && useScoreboard(x => wbInfo(0).pipeid === UInt(x._2))))
|
||||
io.sboard_clra := waddr
|
||||
// we don't currently support round-max-magnitude (rm=4)
|
||||
io.illegal_rm := ex_rm(2) && ex_ctrl.round
|
||||
|
||||
divSqrt_wdata := 0
|
||||
divSqrt_flags := 0
|
||||
if (cfg.divSqrt) {
|
||||
val divSqrt_single = Reg(Bool())
|
||||
val divSqrt_rm = Reg(Bits())
|
||||
val divSqrt_flags_double = Reg(Bits())
|
||||
val divSqrt_wdata_double = Reg(Bits())
|
||||
|
||||
val divSqrt = Module(new hardfloat.DivSqrtRecF64)
|
||||
divSqrt_inReady := Mux(divSqrt.io.sqrtOp, divSqrt.io.inReady_sqrt, divSqrt.io.inReady_div)
|
||||
val divSqrt_outValid = divSqrt.io.outValid_div || divSqrt.io.outValid_sqrt
|
||||
divSqrt.io.inValid := mem_reg_valid && (mem_ctrl.div || mem_ctrl.sqrt) && !divSqrt_in_flight
|
||||
divSqrt.io.sqrtOp := mem_ctrl.sqrt
|
||||
divSqrt.io.a := fpiu.io.as_double.in1
|
||||
divSqrt.io.b := fpiu.io.as_double.in2
|
||||
divSqrt.io.roundingMode := fpiu.io.as_double.rm
|
||||
|
||||
when (divSqrt.io.inValid && divSqrt_inReady) {
|
||||
divSqrt_in_flight := true
|
||||
divSqrt_killed := killm
|
||||
divSqrt_single := mem_ctrl.single
|
||||
divSqrt_waddr := mem_reg_inst(11,7)
|
||||
divSqrt_rm := divSqrt.io.roundingMode
|
||||
}
|
||||
|
||||
when (divSqrt_outValid) {
|
||||
divSqrt_wen := !divSqrt_killed
|
||||
divSqrt_wdata_double := divSqrt.io.out
|
||||
divSqrt_in_flight := false
|
||||
divSqrt_flags_double := divSqrt.io.exceptionFlags
|
||||
}
|
||||
|
||||
val divSqrt_toSingle = Module(new hardfloat.RecFNToRecFN(11, 53, 8, 24))
|
||||
divSqrt_toSingle.io.in := divSqrt_wdata_double
|
||||
divSqrt_toSingle.io.roundingMode := divSqrt_rm
|
||||
divSqrt_wdata := Mux(divSqrt_single, divSqrt_toSingle.io.out, divSqrt_wdata_double)
|
||||
divSqrt_flags := divSqrt_flags_double | Mux(divSqrt_single, divSqrt_toSingle.io.exceptionFlags, Bits(0))
|
||||
} else {
|
||||
when (ex_ctrl.div || ex_ctrl.sqrt) { io.illegal_rm := true }
|
||||
}
|
||||
}
|
||||
133
src/main/scala/rocket/frontend.scala
Normal file
133
src/main/scala/rocket/frontend.scala
Normal file
@@ -0,0 +1,133 @@
|
||||
package rocket
|
||||
|
||||
import Chisel._
|
||||
import uncore.tilelink._
|
||||
import Util._
|
||||
import cde.{Parameters, Field}
|
||||
|
||||
class FrontendReq(implicit p: Parameters) extends CoreBundle()(p) {
|
||||
val pc = UInt(width = vaddrBitsExtended)
|
||||
val speculative = Bool()
|
||||
}
|
||||
|
||||
class FrontendResp(implicit p: Parameters) extends CoreBundle()(p) {
|
||||
val btb = Valid(new BTBResp)
|
||||
val pc = UInt(width = vaddrBitsExtended) // ID stage PC
|
||||
val data = UInt(width = fetchWidth * coreInstBits)
|
||||
val mask = Bits(width = fetchWidth)
|
||||
val xcpt_if = Bool()
|
||||
val replay = Bool()
|
||||
}
|
||||
|
||||
class FrontendIO(implicit p: Parameters) extends CoreBundle()(p) {
|
||||
val req = Valid(new FrontendReq)
|
||||
val resp = Decoupled(new FrontendResp).flip
|
||||
val btb_update = Valid(new BTBUpdate)
|
||||
val bht_update = Valid(new BHTUpdate)
|
||||
val ras_update = Valid(new RASUpdate)
|
||||
val flush_icache = Bool(OUTPUT)
|
||||
val flush_tlb = Bool(OUTPUT)
|
||||
val npc = UInt(INPUT, width = vaddrBitsExtended)
|
||||
}
|
||||
|
||||
class Frontend(implicit p: Parameters) extends CoreModule()(p) with HasL1CacheParameters {
|
||||
val io = new Bundle {
|
||||
val cpu = new FrontendIO().flip
|
||||
val ptw = new TLBPTWIO()
|
||||
val mem = new ClientUncachedTileLinkIO
|
||||
}
|
||||
|
||||
val icache = Module(new ICache(latency = 2))
|
||||
val tlb = Module(new TLB)
|
||||
|
||||
val s1_pc_ = Reg(UInt(width=vaddrBitsExtended))
|
||||
val s1_pc = ~(~s1_pc_ | (coreInstBytes-1)) // discard PC LSBS (this propagates down the pipeline)
|
||||
val s1_speculative = Reg(Bool())
|
||||
val s1_same_block = Reg(Bool())
|
||||
val s2_valid = Reg(init=Bool(true))
|
||||
val s2_pc = Reg(init=UInt(p(ResetVector)))
|
||||
val s2_btb_resp_valid = Reg(init=Bool(false))
|
||||
val s2_btb_resp_bits = Reg(new BTBResp)
|
||||
val s2_xcpt_if = Reg(init=Bool(false))
|
||||
val s2_speculative = Reg(init=Bool(false))
|
||||
val s2_cacheable = Reg(init=Bool(false))
|
||||
|
||||
val ntpc = ~(~s1_pc | (coreInstBytes*fetchWidth-1)) + UInt(coreInstBytes*fetchWidth)
|
||||
val ntpc_same_block = (ntpc & rowBytes) === (s1_pc & rowBytes)
|
||||
val predicted_npc = Wire(init = ntpc)
|
||||
val predicted_taken = Wire(init = Bool(false))
|
||||
val icmiss = s2_valid && !icache.io.resp.valid
|
||||
val npc = Mux(icmiss, s2_pc, predicted_npc)
|
||||
val s0_same_block = !predicted_taken && !icmiss && !io.cpu.req.valid && ntpc_same_block
|
||||
|
||||
val stall = io.cpu.resp.valid && !io.cpu.resp.ready
|
||||
when (!stall) {
|
||||
s1_same_block := s0_same_block && !tlb.io.resp.miss
|
||||
s1_pc_ := io.cpu.npc
|
||||
// consider RVC fetches across blocks to be non-speculative if the first
|
||||
// part was non-speculative
|
||||
val s0_speculative =
|
||||
if (usingCompressed) s1_speculative || s2_valid && !s2_speculative || predicted_taken
|
||||
else Bool(true)
|
||||
s1_speculative := Mux(icmiss, s2_speculative, s0_speculative)
|
||||
s2_valid := !icmiss
|
||||
when (!icmiss) {
|
||||
s2_pc := s1_pc
|
||||
s2_speculative := s1_speculative
|
||||
s2_cacheable := tlb.io.resp.cacheable
|
||||
s2_xcpt_if := tlb.io.resp.xcpt_if
|
||||
}
|
||||
}
|
||||
when (io.cpu.req.valid) {
|
||||
s1_same_block := Bool(false)
|
||||
s1_pc_ := io.cpu.npc
|
||||
s1_speculative := io.cpu.req.bits.speculative
|
||||
s2_valid := Bool(false)
|
||||
}
|
||||
|
||||
if (p(BtbKey).nEntries > 0) {
|
||||
val btb = Module(new BTB)
|
||||
btb.io.req.valid := false
|
||||
btb.io.req.bits.addr := s1_pc_
|
||||
btb.io.btb_update := io.cpu.btb_update
|
||||
btb.io.bht_update := io.cpu.bht_update
|
||||
btb.io.ras_update := io.cpu.ras_update
|
||||
when (!stall && !icmiss) {
|
||||
btb.io.req.valid := true
|
||||
s2_btb_resp_valid := btb.io.resp.valid
|
||||
s2_btb_resp_bits := btb.io.resp.bits
|
||||
}
|
||||
when (btb.io.resp.valid && btb.io.resp.bits.taken) {
|
||||
predicted_npc := btb.io.resp.bits.target.sextTo(vaddrBitsExtended)
|
||||
predicted_taken := Bool(true)
|
||||
}
|
||||
}
|
||||
|
||||
io.ptw <> tlb.io.ptw
|
||||
tlb.io.req.valid := !stall && !icmiss
|
||||
tlb.io.req.bits.vpn := s1_pc >> pgIdxBits
|
||||
tlb.io.req.bits.passthrough := Bool(false)
|
||||
tlb.io.req.bits.instruction := Bool(true)
|
||||
tlb.io.req.bits.store := Bool(false)
|
||||
|
||||
io.mem <> icache.io.mem
|
||||
icache.io.req.valid := !stall && !s0_same_block
|
||||
icache.io.req.bits.addr := io.cpu.npc
|
||||
icache.io.invalidate := io.cpu.flush_icache
|
||||
icache.io.s1_ppn := tlb.io.resp.ppn
|
||||
icache.io.s1_kill := io.cpu.req.valid || tlb.io.resp.miss || tlb.io.resp.xcpt_if || icmiss || io.cpu.flush_tlb
|
||||
icache.io.s2_kill := s2_speculative && !s2_cacheable
|
||||
icache.io.resp.ready := !stall && !s1_same_block
|
||||
|
||||
io.cpu.resp.valid := s2_valid && (icache.io.resp.valid || icache.io.s2_kill || s2_xcpt_if)
|
||||
io.cpu.resp.bits.pc := s2_pc
|
||||
io.cpu.npc := Mux(io.cpu.req.valid, io.cpu.req.bits.pc, npc)
|
||||
|
||||
require(fetchWidth * coreInstBytes <= rowBytes && isPow2(fetchWidth))
|
||||
io.cpu.resp.bits.data := icache.io.resp.bits.datablock >> (s2_pc.extract(log2Ceil(rowBytes)-1,log2Ceil(fetchWidth*coreInstBytes)) << log2Ceil(fetchWidth*coreInstBits))
|
||||
io.cpu.resp.bits.mask := UInt((1 << fetchWidth)-1) << s2_pc.extract(log2Ceil(fetchWidth)+log2Ceil(coreInstBytes)-1, log2Ceil(coreInstBytes))
|
||||
io.cpu.resp.bits.xcpt_if := s2_xcpt_if
|
||||
io.cpu.resp.bits.replay := icache.io.s2_kill && !icache.io.resp.valid && !s2_xcpt_if
|
||||
io.cpu.resp.bits.btb.valid := s2_btb_resp_valid
|
||||
io.cpu.resp.bits.btb.bits := s2_btb_resp_bits
|
||||
}
|
||||
132
src/main/scala/rocket/ibuf.scala
Normal file
132
src/main/scala/rocket/ibuf.scala
Normal file
@@ -0,0 +1,132 @@
|
||||
// See LICENSE for license details.
|
||||
|
||||
package rocket
|
||||
|
||||
import Chisel._
|
||||
import Util._
|
||||
import cde.{Parameters, Field}
|
||||
import junctions._
|
||||
|
||||
class Instruction(implicit val p: Parameters) extends ParameterizedBundle with HasCoreParameters {
|
||||
val pf0 = Bool() // page fault on first half of instruction
|
||||
val pf1 = Bool() // page fault on second half of instruction
|
||||
val replay = Bool()
|
||||
val btb_hit = Bool()
|
||||
val rvc = Bool()
|
||||
val inst = new ExpandedInstruction
|
||||
require(coreInstBits == (if (usingCompressed) 16 else 32))
|
||||
}
|
||||
|
||||
class IBuf(implicit p: Parameters) extends CoreModule {
|
||||
val io = new Bundle {
|
||||
val imem = Decoupled(new FrontendResp).flip
|
||||
val kill = Bool(INPUT)
|
||||
val pc = UInt(width = vaddrBitsExtended)
|
||||
val btb_resp = new BTBResp().asOutput
|
||||
val inst = Vec(retireWidth, Decoupled(new Instruction))
|
||||
}
|
||||
|
||||
// This module is meant to be more general, but it's not there yet
|
||||
require(decodeWidth == 1)
|
||||
|
||||
val n = fetchWidth - 1
|
||||
val nBufValid = if (n == 0) UInt(0) else Reg(init=UInt(0, log2Ceil(fetchWidth)))
|
||||
val buf = Reg(io.imem.bits)
|
||||
val ibufBTBHit = Reg(Bool())
|
||||
val ibufBTBResp = Reg(new BTBResp)
|
||||
val pcWordMask = UInt(coreInstBytes*fetchWidth-1, vaddrBitsExtended)
|
||||
|
||||
val pcWordBits = io.imem.bits.pc.extract(log2Ceil(fetchWidth*coreInstBytes)-1, log2Ceil(coreInstBytes))
|
||||
val nReady = Wire(init = UInt(0, log2Ceil(fetchWidth+1)))
|
||||
val nIC = Mux(io.imem.bits.btb.valid && io.imem.bits.btb.bits.taken, io.imem.bits.btb.bits.bridx +& 1, UInt(fetchWidth)) - pcWordBits
|
||||
val nICReady = nReady - nBufValid
|
||||
val nValid = Mux(io.imem.valid, nIC, UInt(0)) + nBufValid
|
||||
io.imem.ready := nReady >= nBufValid && (nICReady >= nIC || n >= nIC - nICReady)
|
||||
|
||||
if (n > 0) {
|
||||
nBufValid := Mux(nReady >= nBufValid, UInt(0), nBufValid - nReady)
|
||||
if (n > 1) when (nReady > 0 && nReady < nBufValid) {
|
||||
val shiftedBuf = shiftInsnRight(buf.data(n*coreInstBits-1, coreInstBits), (nReady-1)(log2Ceil(n-1)-1,0))
|
||||
buf.data := Cat(buf.data(n*coreInstBits-1, (n-1)*coreInstBits), shiftedBuf((n-1)*coreInstBits-1, 0))
|
||||
buf.pc := buf.pc & ~pcWordMask | (buf.pc + (nReady << log2Ceil(coreInstBytes))) & pcWordMask
|
||||
ibufBTBResp.bridx := ibufBTBResp.bridx - nReady
|
||||
}
|
||||
when (io.imem.valid && nReady >= nBufValid && nICReady < nIC && n >= nIC - nICReady) {
|
||||
val shamt = pcWordBits + nICReady
|
||||
nBufValid := nIC - nICReady
|
||||
buf := io.imem.bits
|
||||
buf.data := shiftInsnRight(io.imem.bits.data, shamt)(n*coreInstBits-1,0)
|
||||
buf.pc := io.imem.bits.pc & ~pcWordMask | (io.imem.bits.pc + (nICReady << log2Ceil(coreInstBytes))) & pcWordMask
|
||||
ibufBTBHit := io.imem.bits.btb.valid
|
||||
when (io.imem.bits.btb.valid) {
|
||||
ibufBTBResp := io.imem.bits.btb.bits
|
||||
ibufBTBResp.bridx := io.imem.bits.btb.bits.bridx + nICReady
|
||||
}
|
||||
}
|
||||
when (io.kill) {
|
||||
nBufValid := 0
|
||||
}
|
||||
}
|
||||
|
||||
val icShiftAmt = (fetchWidth + nBufValid - pcWordBits)(log2Ceil(fetchWidth), 0)
|
||||
val icData = shiftInsnLeft(Cat(io.imem.bits.data, Fill(fetchWidth, io.imem.bits.data(coreInstBits-1, 0))), icShiftAmt)
|
||||
.extract(3*fetchWidth*coreInstBits-1, 2*fetchWidth*coreInstBits)
|
||||
val icMask = (~UInt(0, fetchWidth*coreInstBits) << (nBufValid << log2Ceil(coreInstBits)))(fetchWidth*coreInstBits-1,0)
|
||||
val inst = icData & icMask | buf.data & ~icMask
|
||||
|
||||
val valid = (UIntToOH(nValid) - 1)(fetchWidth-1, 0)
|
||||
val bufMask = UIntToOH(nBufValid) - 1
|
||||
val xcpt_if = valid & (Mux(buf.xcpt_if, bufMask, UInt(0)) | Mux(io.imem.bits.xcpt_if, ~bufMask, UInt(0)))
|
||||
val ic_replay = valid & (Mux(buf.replay, bufMask, UInt(0)) | Mux(io.imem.bits.replay, ~bufMask, UInt(0)))
|
||||
val ibufBTBHitMask = Mux(ibufBTBHit, UIntToOH(ibufBTBResp.bridx), UInt(0))
|
||||
val icBTBHitMask = Mux(io.imem.bits.btb.valid, UIntToOH(io.imem.bits.btb.bits.bridx +& nBufValid - pcWordBits), UInt(0))
|
||||
val btbHitMask = ibufBTBHitMask & bufMask | icBTBHitMask & ~bufMask
|
||||
|
||||
io.btb_resp := Mux((ibufBTBHitMask & bufMask).orR, ibufBTBResp, io.imem.bits.btb.bits)
|
||||
io.pc := Mux(nBufValid > 0, buf.pc, io.imem.bits.pc)
|
||||
expand(0, 0, inst)
|
||||
|
||||
def expand(i: Int, j: UInt, curInst: UInt): Unit = if (i < retireWidth) {
|
||||
val exp = Module(new RVCExpander)
|
||||
exp.io.in := curInst
|
||||
io.inst(i).bits.inst := exp.io.out
|
||||
|
||||
if (usingCompressed) {
|
||||
val replay = ic_replay(j) || (!exp.io.rvc && (btbHitMask(j) || ic_replay(j+1)))
|
||||
io.inst(i).valid := valid(j) && (exp.io.rvc || valid(j+1) || xcpt_if(j+1) || replay)
|
||||
io.inst(i).bits.pf0 := xcpt_if(j)
|
||||
io.inst(i).bits.pf1 := !exp.io.rvc && xcpt_if(j+1)
|
||||
io.inst(i).bits.replay := replay
|
||||
io.inst(i).bits.btb_hit := btbHitMask(j) || (!exp.io.rvc && btbHitMask(j+1))
|
||||
io.inst(i).bits.rvc := exp.io.rvc
|
||||
|
||||
when (io.inst(i).fire()) { nReady := Mux(exp.io.rvc, j+1, j+2) }
|
||||
|
||||
expand(i+1, Mux(exp.io.rvc, j+1, j+2), Mux(exp.io.rvc, curInst >> 16, curInst >> 32))
|
||||
} else {
|
||||
when (io.inst(i).ready) { nReady := i+1 }
|
||||
io.inst(i).valid := valid(i)
|
||||
io.inst(i).bits.pf0 := xcpt_if(i)
|
||||
io.inst(i).bits.pf1 := false
|
||||
io.inst(i).bits.replay := ic_replay(i)
|
||||
io.inst(i).bits.rvc := false
|
||||
io.inst(i).bits.btb_hit := btbHitMask(i)
|
||||
|
||||
expand(i+1, null, curInst >> 32)
|
||||
}
|
||||
}
|
||||
|
||||
def shiftInsnLeft(in: UInt, dist: UInt) = {
|
||||
val r = in.getWidth/coreInstBits
|
||||
require(in.getWidth % coreInstBits == 0)
|
||||
val data = Cat(Fill((1 << (log2Ceil(r) + 1)) - r, in >> (r-1)*coreInstBits), in)
|
||||
data << (dist << log2Ceil(coreInstBits))
|
||||
}
|
||||
|
||||
def shiftInsnRight(in: UInt, dist: UInt) = {
|
||||
val r = in.getWidth/coreInstBits
|
||||
require(in.getWidth % coreInstBits == 0)
|
||||
val data = Cat(Fill((1 << (log2Ceil(r) + 1)) - r, in >> (r-1)*coreInstBits), in)
|
||||
data >> (dist << log2Ceil(coreInstBits))
|
||||
}
|
||||
}
|
||||
157
src/main/scala/rocket/icache.scala
Normal file
157
src/main/scala/rocket/icache.scala
Normal file
@@ -0,0 +1,157 @@
|
||||
package rocket
|
||||
|
||||
import Chisel._
|
||||
import uncore.agents._
|
||||
import uncore.tilelink._
|
||||
import uncore.util._
|
||||
import Util._
|
||||
import cde.{Parameters, Field}
|
||||
|
||||
trait HasL1CacheParameters extends HasCacheParameters with HasCoreParameters {
|
||||
val outerDataBeats = p(TLKey(p(TLId))).dataBeats
|
||||
val outerDataBits = p(TLKey(p(TLId))).dataBitsPerBeat
|
||||
val refillCyclesPerBeat = outerDataBits/rowBits
|
||||
val refillCycles = refillCyclesPerBeat*outerDataBeats
|
||||
}
|
||||
|
||||
class ICacheReq(implicit p: Parameters) extends CoreBundle()(p) with HasL1CacheParameters {
|
||||
val addr = UInt(width = vaddrBits)
|
||||
}
|
||||
|
||||
class ICacheResp(implicit p: Parameters) extends CoreBundle()(p) with HasL1CacheParameters {
|
||||
val data = Bits(width = coreInstBits)
|
||||
val datablock = Bits(width = rowBits)
|
||||
}
|
||||
|
||||
class ICache(latency: Int)(implicit p: Parameters) extends CoreModule()(p) with HasL1CacheParameters {
|
||||
val io = new Bundle {
|
||||
val req = Valid(new ICacheReq).flip
|
||||
val s1_ppn = UInt(INPUT, ppnBits) // delayed one cycle w.r.t. req
|
||||
val s1_kill = Bool(INPUT) // delayed one cycle w.r.t. req
|
||||
val s2_kill = Bool(INPUT) // delayed two cycles; prevents I$ miss emission
|
||||
|
||||
val resp = Decoupled(new ICacheResp)
|
||||
val invalidate = Bool(INPUT)
|
||||
val mem = new ClientUncachedTileLinkIO
|
||||
}
|
||||
require(isPow2(nSets) && isPow2(nWays))
|
||||
require(isPow2(coreInstBytes))
|
||||
require(!usingVM || pgIdxBits >= untagBits)
|
||||
|
||||
val s_ready :: s_request :: s_refill_wait :: s_refill :: Nil = Enum(UInt(), 4)
|
||||
val state = Reg(init=s_ready)
|
||||
val invalidated = Reg(Bool())
|
||||
val stall = !io.resp.ready
|
||||
val rdy = Wire(Bool())
|
||||
|
||||
val refill_addr = Reg(UInt(width = paddrBits))
|
||||
val s1_any_tag_hit = Wire(Bool())
|
||||
|
||||
val s1_valid = Reg(init=Bool(false))
|
||||
val s1_vaddr = Reg(UInt())
|
||||
val s1_paddr = Cat(io.s1_ppn, s1_vaddr(pgIdxBits-1,0))
|
||||
val s1_tag = s1_paddr(tagBits+untagBits-1,untagBits)
|
||||
|
||||
val s0_valid = io.req.valid || s1_valid && stall
|
||||
val s0_vaddr = Mux(s1_valid && stall, s1_vaddr, io.req.bits.addr)
|
||||
|
||||
s1_valid := io.req.valid && rdy || s1_valid && stall && !io.s1_kill
|
||||
when (io.req.valid && rdy) {
|
||||
s1_vaddr := io.req.bits.addr
|
||||
}
|
||||
|
||||
val out_valid = s1_valid && !io.s1_kill && state === s_ready
|
||||
val s1_idx = s1_vaddr(untagBits-1,blockOffBits)
|
||||
val s1_hit = out_valid && s1_any_tag_hit
|
||||
val s1_miss = out_valid && !s1_any_tag_hit
|
||||
rdy := state === s_ready && !s1_miss
|
||||
|
||||
when (s1_miss && state === s_ready) {
|
||||
refill_addr := s1_paddr
|
||||
}
|
||||
val refill_tag = refill_addr(tagBits+untagBits-1,untagBits)
|
||||
|
||||
val narrow_grant = FlowThroughSerializer(io.mem.grant, refillCyclesPerBeat)
|
||||
val (refill_cnt, refill_wrap) = Counter(narrow_grant.fire(), refillCycles)
|
||||
val refill_done = state === s_refill && refill_wrap
|
||||
narrow_grant.ready := Bool(true)
|
||||
|
||||
val repl_way = if (isDM) UInt(0) else LFSR16(s1_miss)(log2Up(nWays)-1,0)
|
||||
val entagbits = code.width(tagBits)
|
||||
val tag_array = SeqMem(nSets, Vec(nWays, Bits(width = entagbits)))
|
||||
val tag_rdata = tag_array.read(s0_vaddr(untagBits-1,blockOffBits), !refill_done && s0_valid)
|
||||
when (refill_done) {
|
||||
val tag = code.encode(refill_tag)
|
||||
tag_array.write(s1_idx, Vec.fill(nWays)(tag), Vec.tabulate(nWays)(repl_way === _))
|
||||
}
|
||||
|
||||
val vb_array = Reg(init=Bits(0, nSets*nWays))
|
||||
when (refill_done && !invalidated) {
|
||||
vb_array := vb_array.bitSet(Cat(repl_way, s1_idx), Bool(true))
|
||||
}
|
||||
when (io.invalidate) {
|
||||
vb_array := Bits(0)
|
||||
invalidated := Bool(true)
|
||||
}
|
||||
val s1_disparity = Wire(Vec(nWays, Bool()))
|
||||
for (i <- 0 until nWays)
|
||||
when (s1_valid && s1_disparity(i)) { vb_array := vb_array.bitSet(Cat(UInt(i), s1_idx), Bool(false)) }
|
||||
|
||||
val s1_tag_match = Wire(Vec(nWays, Bool()))
|
||||
val s1_tag_hit = Wire(Vec(nWays, Bool()))
|
||||
val s1_dout = Wire(Vec(nWays, Bits(width = rowBits)))
|
||||
|
||||
for (i <- 0 until nWays) {
|
||||
val s1_vb = !io.invalidate && vb_array(Cat(UInt(i), s1_vaddr(untagBits-1,blockOffBits))).toBool
|
||||
val tag_out = tag_rdata(i)
|
||||
val s1_tag_disparity = code.decode(tag_out).error
|
||||
s1_tag_match(i) := tag_out(tagBits-1,0) === s1_tag
|
||||
s1_tag_hit(i) := s1_vb && s1_tag_match(i)
|
||||
s1_disparity(i) := s1_vb && (s1_tag_disparity || code.decode(s1_dout(i)).error)
|
||||
}
|
||||
s1_any_tag_hit := s1_tag_hit.reduceLeft(_||_) && !s1_disparity.reduceLeft(_||_)
|
||||
|
||||
for (i <- 0 until nWays) {
|
||||
val data_array = SeqMem(nSets * refillCycles, Bits(width = code.width(rowBits)))
|
||||
val wen = narrow_grant.valid && repl_way === UInt(i)
|
||||
when (wen) {
|
||||
val e_d = code.encode(narrow_grant.bits.data)
|
||||
data_array.write((s1_idx << log2Ceil(refillCycles)) | refill_cnt, e_d)
|
||||
}
|
||||
val s0_raddr = s0_vaddr(untagBits-1,blockOffBits-log2Ceil(refillCycles))
|
||||
s1_dout(i) := data_array.read(s0_raddr, !wen && s0_valid)
|
||||
}
|
||||
|
||||
// output signals
|
||||
latency match {
|
||||
case 1 =>
|
||||
io.resp.bits.datablock := Mux1H(s1_tag_hit, s1_dout)
|
||||
io.resp.valid := s1_hit
|
||||
case 2 =>
|
||||
val s2_hit = RegEnable(s1_hit, !stall)
|
||||
val s2_tag_hit = RegEnable(s1_tag_hit, !stall)
|
||||
val s2_dout = RegEnable(s1_dout, !stall)
|
||||
io.resp.bits.datablock := Mux1H(s2_tag_hit, s2_dout)
|
||||
io.resp.valid := s2_hit
|
||||
}
|
||||
io.mem.acquire.valid := state === s_request && !io.s2_kill
|
||||
io.mem.acquire.bits := GetBlock(addr_block = refill_addr >> blockOffBits)
|
||||
|
||||
// control state machine
|
||||
switch (state) {
|
||||
is (s_ready) {
|
||||
when (s1_miss) { state := s_request }
|
||||
invalidated := Bool(false)
|
||||
}
|
||||
is (s_request) {
|
||||
when (io.mem.acquire.ready) { state := s_refill_wait }
|
||||
when (io.s2_kill) { state := s_ready }
|
||||
}
|
||||
is (s_refill_wait) {
|
||||
when (io.mem.grant.valid) { state := s_refill }
|
||||
}
|
||||
is (s_refill) {
|
||||
when (refill_done) { state := s_ready }
|
||||
}
|
||||
}
|
||||
}
|
||||
314
src/main/scala/rocket/idecode.scala
Normal file
314
src/main/scala/rocket/idecode.scala
Normal file
@@ -0,0 +1,314 @@
|
||||
// See LICENSE for license details
|
||||
|
||||
package rocket
|
||||
|
||||
import Chisel._
|
||||
import Instructions._
|
||||
import uncore.constants.MemoryOpConstants._
|
||||
import ALU._
|
||||
import cde.Parameters
|
||||
import Util._
|
||||
|
||||
abstract trait DecodeConstants extends HasCoreParameters
|
||||
{
|
||||
val table: Array[(BitPat, List[BitPat])]
|
||||
}
|
||||
|
||||
class IntCtrlSigs extends Bundle {
|
||||
val legal = Bool()
|
||||
val fp = Bool()
|
||||
val rocc = Bool()
|
||||
val branch = Bool()
|
||||
val jal = Bool()
|
||||
val jalr = Bool()
|
||||
val rxs2 = Bool()
|
||||
val rxs1 = Bool()
|
||||
val sel_alu2 = Bits(width = A2_X.getWidth)
|
||||
val sel_alu1 = Bits(width = A1_X.getWidth)
|
||||
val sel_imm = Bits(width = IMM_X.getWidth)
|
||||
val alu_dw = Bool()
|
||||
val alu_fn = Bits(width = FN_X.getWidth)
|
||||
val mem = Bool()
|
||||
val mem_cmd = Bits(width = M_SZ)
|
||||
val mem_type = Bits(width = MT_SZ)
|
||||
val rfs1 = Bool()
|
||||
val rfs2 = Bool()
|
||||
val rfs3 = Bool()
|
||||
val wfd = Bool()
|
||||
val div = Bool()
|
||||
val wxd = Bool()
|
||||
val csr = Bits(width = CSR.SZ)
|
||||
val fence_i = Bool()
|
||||
val fence = Bool()
|
||||
val amo = Bool()
|
||||
|
||||
def default: List[BitPat] =
|
||||
// jal renf1 fence.i
|
||||
// val | jalr | renf2 |
|
||||
// | fp_val| | renx2 | | renf3 |
|
||||
// | | rocc| | | renx1 s_alu1 mem_val | | | wfd |
|
||||
// | | | br| | | | s_alu2 | imm dw alu | mem_cmd mem_type| | | | div |
|
||||
// | | | | | | | | | | | | | | | | | | | | | wxd | fence
|
||||
// | | | | | | | | | | | | | | | | | | | | | | csr | | amo
|
||||
// | | | | | | | | | | | | | | | | | | | | | | | | | |
|
||||
List(N,X,X,X,X,X,X,X,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, X,X,X,X,X,X,CSR.X,X,X,X)
|
||||
|
||||
def decode(inst: UInt, table: Iterable[(BitPat, List[BitPat])]) = {
|
||||
val decoder = DecodeLogic(inst, default, table)
|
||||
val sigs = Seq(legal, fp, rocc, branch, jal, jalr, rxs2, rxs1, sel_alu2,
|
||||
sel_alu1, sel_imm, alu_dw, alu_fn, mem, mem_cmd, mem_type,
|
||||
rfs1, rfs2, rfs3, wfd, div, wxd, csr, fence_i, fence, amo)
|
||||
sigs zip decoder map {case(s,d) => s := d}
|
||||
this
|
||||
}
|
||||
}
|
||||
|
||||
class IDecode(implicit val p: Parameters) extends DecodeConstants
|
||||
{
|
||||
val table: Array[(BitPat, List[BitPat])] = Array(
|
||||
BNE-> List(Y,N,N,Y,N,N,Y,Y,A2_RS2, A1_RS1, IMM_SB,DW_X, FN_SNE, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N),
|
||||
BEQ-> List(Y,N,N,Y,N,N,Y,Y,A2_RS2, A1_RS1, IMM_SB,DW_X, FN_SEQ, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N),
|
||||
BLT-> List(Y,N,N,Y,N,N,Y,Y,A2_RS2, A1_RS1, IMM_SB,DW_X, FN_SLT, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N),
|
||||
BLTU-> List(Y,N,N,Y,N,N,Y,Y,A2_RS2, A1_RS1, IMM_SB,DW_X, FN_SLTU, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N),
|
||||
BGE-> List(Y,N,N,Y,N,N,Y,Y,A2_RS2, A1_RS1, IMM_SB,DW_X, FN_SGE, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N),
|
||||
BGEU-> List(Y,N,N,Y,N,N,Y,Y,A2_RS2, A1_RS1, IMM_SB,DW_X, FN_SGEU, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N),
|
||||
|
||||
JAL-> List(Y,N,N,N,Y,N,N,N,A2_SIZE,A1_PC, IMM_UJ,DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
|
||||
JALR-> List(Y,N,N,N,N,Y,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
|
||||
AUIPC-> List(Y,N,N,N,N,N,N,N,A2_IMM, A1_PC, IMM_U, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
|
||||
|
||||
LB-> List(Y,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_B, N,N,N,N,N,Y,CSR.N,N,N,N),
|
||||
LH-> List(Y,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_H, N,N,N,N,N,Y,CSR.N,N,N,N),
|
||||
LW-> List(Y,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_W, N,N,N,N,N,Y,CSR.N,N,N,N),
|
||||
LBU-> List(Y,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_BU,N,N,N,N,N,Y,CSR.N,N,N,N),
|
||||
LHU-> List(Y,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_HU,N,N,N,N,N,Y,CSR.N,N,N,N),
|
||||
SB-> List(Y,N,N,N,N,N,Y,Y,A2_IMM, A1_RS1, IMM_S, DW_XPR,FN_ADD, Y,M_XWR, MT_B, N,N,N,N,N,N,CSR.N,N,N,N),
|
||||
SH-> List(Y,N,N,N,N,N,Y,Y,A2_IMM, A1_RS1, IMM_S, DW_XPR,FN_ADD, Y,M_XWR, MT_H, N,N,N,N,N,N,CSR.N,N,N,N),
|
||||
SW-> List(Y,N,N,N,N,N,Y,Y,A2_IMM, A1_RS1, IMM_S, DW_XPR,FN_ADD, Y,M_XWR, MT_W, N,N,N,N,N,N,CSR.N,N,N,N),
|
||||
|
||||
LUI-> List(Y,N,N,N,N,N,N,N,A2_IMM, A1_ZERO,IMM_U, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
|
||||
ADDI-> List(Y,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
|
||||
SLTI -> List(Y,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_SLT, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
|
||||
SLTIU-> List(Y,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_SLTU, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
|
||||
ANDI-> List(Y,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_AND, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
|
||||
ORI-> List(Y,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_OR, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
|
||||
XORI-> List(Y,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_XOR, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
|
||||
SLLI-> List(Y,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_SL, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
|
||||
SRLI-> List(Y,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_SR, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
|
||||
SRAI-> List(Y,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_SRA, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
|
||||
ADD-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
|
||||
SUB-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_SUB, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
|
||||
SLT-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_SLT, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
|
||||
SLTU-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_SLTU, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
|
||||
AND-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_AND, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
|
||||
OR-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_OR, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
|
||||
XOR-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_XOR, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
|
||||
SLL-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_SL, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
|
||||
SRL-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_SR, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
|
||||
SRA-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_SRA, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
|
||||
|
||||
FENCE-> List(Y,N,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,Y,N),
|
||||
FENCE_I-> List(Y,N,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, Y,M_FLUSH_ALL,MT_X, N,N,N,N,N,N,CSR.N,Y,N,N),
|
||||
|
||||
SCALL-> List(Y,N,N,N,N,N,N,X,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,N,N,N,CSR.I,N,N,N),
|
||||
SBREAK-> List(Y,N,N,N,N,N,N,X,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,N,N,N,CSR.I,N,N,N),
|
||||
MRET-> List(Y,N,N,N,N,N,N,X,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,N,N,N,CSR.I,N,N,N),
|
||||
WFI-> List(Y,N,N,N,N,N,N,X,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,N,N,N,CSR.I,N,N,N),
|
||||
CSRRW-> List(Y,N,N,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.W,N,N,N),
|
||||
CSRRS-> List(Y,N,N,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.S,N,N,N),
|
||||
CSRRC-> List(Y,N,N,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.C,N,N,N),
|
||||
CSRRWI-> List(Y,N,N,N,N,N,N,N,A2_IMM, A1_ZERO,IMM_Z, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.W,N,N,N),
|
||||
CSRRSI-> List(Y,N,N,N,N,N,N,N,A2_IMM, A1_ZERO,IMM_Z, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.S,N,N,N),
|
||||
CSRRCI-> List(Y,N,N,N,N,N,N,N,A2_IMM, A1_ZERO,IMM_Z, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.C,N,N,N))
|
||||
}
|
||||
|
||||
class SDecode(implicit val p: Parameters) extends DecodeConstants
|
||||
{
|
||||
val table: Array[(BitPat, List[BitPat])] = Array(
|
||||
SFENCE_VM-> List(Y,N,N,N,N,N,N,X,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,N,N,N,CSR.I,N,N,N),
|
||||
SRET-> List(Y,N,N,N,N,N,N,X,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,N,N,N,CSR.I,N,N,N))
|
||||
}
|
||||
|
||||
class DebugDecode(implicit val p: Parameters) extends DecodeConstants
|
||||
{
|
||||
val table: Array[(BitPat, List[BitPat])] = Array(
|
||||
DRET-> List(Y,N,N,N,N,N,N,X,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,N,N,N,CSR.I,N,N,N))
|
||||
}
|
||||
|
||||
class I64Decode(implicit val p: Parameters) extends DecodeConstants
|
||||
{
|
||||
val table: Array[(BitPat, List[BitPat])] = Array(
|
||||
LD-> List(Y,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_D, N,N,N,N,N,Y,CSR.N,N,N,N),
|
||||
LWU-> List(Y,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_WU,N,N,N,N,N,Y,CSR.N,N,N,N),
|
||||
SD-> List(Y,N,N,N,N,N,Y,Y,A2_IMM, A1_RS1, IMM_S, DW_XPR,FN_ADD, Y,M_XWR, MT_D, N,N,N,N,N,N,CSR.N,N,N,N),
|
||||
|
||||
ADDIW-> List(Y,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_32,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
|
||||
SLLIW-> List(Y,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_32,FN_SL, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
|
||||
SRLIW-> List(Y,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_32,FN_SR, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
|
||||
SRAIW-> List(Y,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_32,FN_SRA, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
|
||||
ADDW-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
|
||||
SUBW-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32,FN_SUB, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
|
||||
SLLW-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32,FN_SL, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
|
||||
SRLW-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32,FN_SR, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
|
||||
SRAW-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32,FN_SRA, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N))
|
||||
}
|
||||
|
||||
class MDecode(implicit val p: Parameters) extends DecodeConstants
|
||||
{
|
||||
val table: Array[(BitPat, List[BitPat])] = Array(
|
||||
MUL-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_MUL, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N),
|
||||
MULH-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_MULH, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N),
|
||||
MULHU-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_MULHU, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N),
|
||||
MULHSU-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_MULHSU,N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N),
|
||||
|
||||
DIV-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_DIV, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N),
|
||||
DIVU-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_DIVU, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N),
|
||||
REM-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_REM, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N),
|
||||
REMU-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_REMU, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N))
|
||||
}
|
||||
|
||||
class M64Decode(implicit val p: Parameters) extends DecodeConstants
|
||||
{
|
||||
val table: Array[(BitPat, List[BitPat])] = Array(
|
||||
MULW-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32, FN_MUL, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N),
|
||||
|
||||
DIVW-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32, FN_DIV, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N),
|
||||
DIVUW-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32, FN_DIVU, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N),
|
||||
REMW-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32, FN_REM, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N),
|
||||
REMUW-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32, FN_REMU, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N))
|
||||
}
|
||||
|
||||
class ADecode(implicit val p: Parameters) extends DecodeConstants
|
||||
{
|
||||
val table: Array[(BitPat, List[BitPat])] = Array(
|
||||
AMOADD_W-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_ADD, MT_W, N,N,N,N,N,Y,CSR.N,N,N,Y),
|
||||
AMOXOR_W-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_XOR, MT_W, N,N,N,N,N,Y,CSR.N,N,N,Y),
|
||||
AMOSWAP_W-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_SWAP, MT_W, N,N,N,N,N,Y,CSR.N,N,N,Y),
|
||||
AMOAND_W-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_AND, MT_W, N,N,N,N,N,Y,CSR.N,N,N,Y),
|
||||
AMOOR_W-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_OR, MT_W, N,N,N,N,N,Y,CSR.N,N,N,Y),
|
||||
AMOMIN_W-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MIN, MT_W, N,N,N,N,N,Y,CSR.N,N,N,Y),
|
||||
AMOMINU_W-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MINU, MT_W, N,N,N,N,N,Y,CSR.N,N,N,Y),
|
||||
AMOMAX_W-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MAX, MT_W, N,N,N,N,N,Y,CSR.N,N,N,Y),
|
||||
AMOMAXU_W-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MAXU, MT_W, N,N,N,N,N,Y,CSR.N,N,N,Y),
|
||||
|
||||
LR_W-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XLR, MT_W, N,N,N,N,N,Y,CSR.N,N,N,Y),
|
||||
SC_W-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XSC, MT_W, N,N,N,N,N,Y,CSR.N,N,N,Y))
|
||||
}
|
||||
|
||||
class A64Decode(implicit val p: Parameters) extends DecodeConstants
|
||||
{
|
||||
val table: Array[(BitPat, List[BitPat])] = Array(
|
||||
AMOADD_D-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_ADD, MT_D, N,N,N,N,N,Y,CSR.N,N,N,Y),
|
||||
AMOSWAP_D-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_SWAP, MT_D, N,N,N,N,N,Y,CSR.N,N,N,Y),
|
||||
AMOXOR_D-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_XOR, MT_D, N,N,N,N,N,Y,CSR.N,N,N,Y),
|
||||
AMOAND_D-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_AND, MT_D, N,N,N,N,N,Y,CSR.N,N,N,Y),
|
||||
AMOOR_D-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_OR, MT_D, N,N,N,N,N,Y,CSR.N,N,N,Y),
|
||||
AMOMIN_D-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MIN, MT_D, N,N,N,N,N,Y,CSR.N,N,N,Y),
|
||||
AMOMINU_D-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MINU, MT_D, N,N,N,N,N,Y,CSR.N,N,N,Y),
|
||||
AMOMAX_D-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MAX, MT_D, N,N,N,N,N,Y,CSR.N,N,N,Y),
|
||||
AMOMAXU_D-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MAXU, MT_D, N,N,N,N,N,Y,CSR.N,N,N,Y),
|
||||
|
||||
LR_D-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XLR, MT_D, N,N,N,N,N,Y,CSR.N,N,N,Y),
|
||||
SC_D-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XSC, MT_D, N,N,N,N,N,Y,CSR.N,N,N,Y))
|
||||
}
|
||||
|
||||
class FDecode(implicit val p: Parameters) extends DecodeConstants
|
||||
{
|
||||
val table: Array[(BitPat, List[BitPat])] = Array(
|
||||
FCVT_S_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,Y,N,N,CSR.N,N,N,N),
|
||||
FCVT_D_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,Y,N,N,CSR.N,N,N,N),
|
||||
FSGNJ_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N),
|
||||
FSGNJ_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N),
|
||||
FSGNJX_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N),
|
||||
FSGNJX_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N),
|
||||
FSGNJN_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N),
|
||||
FSGNJN_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N),
|
||||
FMIN_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N),
|
||||
FMIN_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N),
|
||||
FMAX_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N),
|
||||
FMAX_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N),
|
||||
FADD_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N),
|
||||
FADD_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N),
|
||||
FSUB_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N),
|
||||
FSUB_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N),
|
||||
FMUL_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N),
|
||||
FMUL_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N),
|
||||
FMADD_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,Y,Y,N,N,CSR.N,N,N,N),
|
||||
FMADD_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,Y,Y,N,N,CSR.N,N,N,N),
|
||||
FMSUB_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,Y,Y,N,N,CSR.N,N,N,N),
|
||||
FMSUB_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,Y,Y,N,N,CSR.N,N,N,N),
|
||||
FNMADD_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,Y,Y,N,N,CSR.N,N,N,N),
|
||||
FNMADD_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,Y,Y,N,N,CSR.N,N,N,N),
|
||||
FNMSUB_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,Y,Y,N,N,CSR.N,N,N,N),
|
||||
FNMSUB_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,Y,Y,N,N,CSR.N,N,N,N),
|
||||
FCLASS_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N),
|
||||
FCLASS_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N),
|
||||
FMV_X_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N),
|
||||
FCVT_W_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N),
|
||||
FCVT_W_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N),
|
||||
FCVT_WU_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N),
|
||||
FCVT_WU_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N),
|
||||
FEQ_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,N,N,Y,CSR.N,N,N,N),
|
||||
FEQ_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,N,N,Y,CSR.N,N,N,N),
|
||||
FLT_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,N,N,Y,CSR.N,N,N,N),
|
||||
FLT_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,N,N,Y,CSR.N,N,N,N),
|
||||
FLE_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,N,N,Y,CSR.N,N,N,N),
|
||||
FLE_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,N,N,Y,CSR.N,N,N,N),
|
||||
FMV_S_X-> List(Y,Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,Y,N,N,CSR.N,N,N,N),
|
||||
FCVT_S_W-> List(Y,Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,Y,N,N,CSR.N,N,N,N),
|
||||
FCVT_D_W-> List(Y,Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,Y,N,N,CSR.N,N,N,N),
|
||||
FCVT_S_WU-> List(Y,Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,Y,N,N,CSR.N,N,N,N),
|
||||
FCVT_D_WU-> List(Y,Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,Y,N,N,CSR.N,N,N,N),
|
||||
FLW-> List(Y,Y,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_W, N,N,N,Y,N,N,CSR.N,N,N,N),
|
||||
FLD-> List(Y,Y,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_D, N,N,N,Y,N,N,CSR.N,N,N,N),
|
||||
FSW-> List(Y,Y,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_S, DW_XPR,FN_ADD, Y,M_XWR, MT_W, N,Y,N,N,N,N,CSR.N,N,N,N),
|
||||
FSD-> List(Y,Y,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_S, DW_XPR,FN_ADD, Y,M_XWR, MT_D, N,Y,N,N,N,N,CSR.N,N,N,N))
|
||||
}
|
||||
|
||||
class F64Decode(implicit val p: Parameters) extends DecodeConstants
|
||||
{
|
||||
val table: Array[(BitPat, List[BitPat])] = Array(
|
||||
FMV_X_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N),
|
||||
FCVT_L_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N),
|
||||
FCVT_L_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N),
|
||||
FCVT_LU_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N),
|
||||
FCVT_LU_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N),
|
||||
FMV_D_X-> List(Y,Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,Y,N,N,CSR.N,N,N,N),
|
||||
FCVT_S_L-> List(Y,Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,Y,N,N,CSR.N,N,N,N),
|
||||
FCVT_D_L-> List(Y,Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,Y,N,N,CSR.N,N,N,N),
|
||||
FCVT_S_LU-> List(Y,Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,Y,N,N,CSR.N,N,N,N),
|
||||
FCVT_D_LU-> List(Y,Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,Y,N,N,CSR.N,N,N,N),
|
||||
FDIV_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N),
|
||||
FDIV_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N),
|
||||
FSQRT_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N),
|
||||
FSQRT_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N))
|
||||
}
|
||||
|
||||
class RoCCDecode(implicit val p: Parameters) extends DecodeConstants
|
||||
{
|
||||
val table: Array[(BitPat, List[BitPat])] = Array(
|
||||
CUSTOM0-> List(Y,N,Y,N,N,N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N),
|
||||
CUSTOM0_RS1-> List(Y,N,Y,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N),
|
||||
CUSTOM0_RS1_RS2-> List(Y,N,Y,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N),
|
||||
CUSTOM0_RD-> List(Y,N,Y,N,N,N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
|
||||
CUSTOM0_RD_RS1-> List(Y,N,Y,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
|
||||
CUSTOM0_RD_RS1_RS2->List(Y,N,Y,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
|
||||
CUSTOM1-> List(Y,N,Y,N,N,N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N),
|
||||
CUSTOM1_RS1-> List(Y,N,Y,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N),
|
||||
CUSTOM1_RS1_RS2-> List(Y,N,Y,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N),
|
||||
CUSTOM1_RD-> List(Y,N,Y,N,N,N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
|
||||
CUSTOM1_RD_RS1-> List(Y,N,Y,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
|
||||
CUSTOM1_RD_RS1_RS2->List(Y,N,Y,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
|
||||
CUSTOM2-> List(Y,N,Y,N,N,N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N),
|
||||
CUSTOM2_RS1-> List(Y,N,Y,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N),
|
||||
CUSTOM2_RS1_RS2-> List(Y,N,Y,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N),
|
||||
CUSTOM2_RD-> List(Y,N,Y,N,N,N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
|
||||
CUSTOM2_RD_RS1-> List(Y,N,Y,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
|
||||
CUSTOM2_RD_RS1_RS2->List(Y,N,Y,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
|
||||
CUSTOM3-> List(Y,N,Y,N,N,N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N),
|
||||
CUSTOM3_RS1-> List(Y,N,Y,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N),
|
||||
CUSTOM3_RS1_RS2-> List(Y,N,Y,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N),
|
||||
CUSTOM3_RD-> List(Y,N,Y,N,N,N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
|
||||
CUSTOM3_RD_RS1-> List(Y,N,Y,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
|
||||
CUSTOM3_RD_RS1_RS2->List(Y,N,Y,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N))
|
||||
}
|
||||
383
src/main/scala/rocket/instructions.scala
Normal file
383
src/main/scala/rocket/instructions.scala
Normal file
@@ -0,0 +1,383 @@
|
||||
// See LICENSE for license details.
|
||||
|
||||
package rocket
|
||||
|
||||
import Chisel._
|
||||
|
||||
/* Automatically generated by parse-opcodes */
|
||||
object Instructions {
|
||||
def BEQ = BitPat("b?????????????????000?????1100011")
|
||||
def BNE = BitPat("b?????????????????001?????1100011")
|
||||
def BLT = BitPat("b?????????????????100?????1100011")
|
||||
def BGE = BitPat("b?????????????????101?????1100011")
|
||||
def BLTU = BitPat("b?????????????????110?????1100011")
|
||||
def BGEU = BitPat("b?????????????????111?????1100011")
|
||||
def JALR = BitPat("b?????????????????000?????1100111")
|
||||
def JAL = BitPat("b?????????????????????????1101111")
|
||||
def LUI = BitPat("b?????????????????????????0110111")
|
||||
def AUIPC = BitPat("b?????????????????????????0010111")
|
||||
def ADDI = BitPat("b?????????????????000?????0010011")
|
||||
def SLLI = BitPat("b000000???????????001?????0010011")
|
||||
def SLTI = BitPat("b?????????????????010?????0010011")
|
||||
def SLTIU = BitPat("b?????????????????011?????0010011")
|
||||
def XORI = BitPat("b?????????????????100?????0010011")
|
||||
def SRLI = BitPat("b000000???????????101?????0010011")
|
||||
def SRAI = BitPat("b010000???????????101?????0010011")
|
||||
def ORI = BitPat("b?????????????????110?????0010011")
|
||||
def ANDI = BitPat("b?????????????????111?????0010011")
|
||||
def ADD = BitPat("b0000000??????????000?????0110011")
|
||||
def SUB = BitPat("b0100000??????????000?????0110011")
|
||||
def SLL = BitPat("b0000000??????????001?????0110011")
|
||||
def SLT = BitPat("b0000000??????????010?????0110011")
|
||||
def SLTU = BitPat("b0000000??????????011?????0110011")
|
||||
def XOR = BitPat("b0000000??????????100?????0110011")
|
||||
def SRL = BitPat("b0000000??????????101?????0110011")
|
||||
def SRA = BitPat("b0100000??????????101?????0110011")
|
||||
def OR = BitPat("b0000000??????????110?????0110011")
|
||||
def AND = BitPat("b0000000??????????111?????0110011")
|
||||
def ADDIW = BitPat("b?????????????????000?????0011011")
|
||||
def SLLIW = BitPat("b0000000??????????001?????0011011")
|
||||
def SRLIW = BitPat("b0000000??????????101?????0011011")
|
||||
def SRAIW = BitPat("b0100000??????????101?????0011011")
|
||||
def ADDW = BitPat("b0000000??????????000?????0111011")
|
||||
def SUBW = BitPat("b0100000??????????000?????0111011")
|
||||
def SLLW = BitPat("b0000000??????????001?????0111011")
|
||||
def SRLW = BitPat("b0000000??????????101?????0111011")
|
||||
def SRAW = BitPat("b0100000??????????101?????0111011")
|
||||
def LB = BitPat("b?????????????????000?????0000011")
|
||||
def LH = BitPat("b?????????????????001?????0000011")
|
||||
def LW = BitPat("b?????????????????010?????0000011")
|
||||
def LD = BitPat("b?????????????????011?????0000011")
|
||||
def LBU = BitPat("b?????????????????100?????0000011")
|
||||
def LHU = BitPat("b?????????????????101?????0000011")
|
||||
def LWU = BitPat("b?????????????????110?????0000011")
|
||||
def SB = BitPat("b?????????????????000?????0100011")
|
||||
def SH = BitPat("b?????????????????001?????0100011")
|
||||
def SW = BitPat("b?????????????????010?????0100011")
|
||||
def SD = BitPat("b?????????????????011?????0100011")
|
||||
def FENCE = BitPat("b?????????????????000?????0001111")
|
||||
def FENCE_I = BitPat("b?????????????????001?????0001111")
|
||||
def MUL = BitPat("b0000001??????????000?????0110011")
|
||||
def MULH = BitPat("b0000001??????????001?????0110011")
|
||||
def MULHSU = BitPat("b0000001??????????010?????0110011")
|
||||
def MULHU = BitPat("b0000001??????????011?????0110011")
|
||||
def DIV = BitPat("b0000001??????????100?????0110011")
|
||||
def DIVU = BitPat("b0000001??????????101?????0110011")
|
||||
def REM = BitPat("b0000001??????????110?????0110011")
|
||||
def REMU = BitPat("b0000001??????????111?????0110011")
|
||||
def MULW = BitPat("b0000001??????????000?????0111011")
|
||||
def DIVW = BitPat("b0000001??????????100?????0111011")
|
||||
def DIVUW = BitPat("b0000001??????????101?????0111011")
|
||||
def REMW = BitPat("b0000001??????????110?????0111011")
|
||||
def REMUW = BitPat("b0000001??????????111?????0111011")
|
||||
def AMOADD_W = BitPat("b00000????????????010?????0101111")
|
||||
def AMOXOR_W = BitPat("b00100????????????010?????0101111")
|
||||
def AMOOR_W = BitPat("b01000????????????010?????0101111")
|
||||
def AMOAND_W = BitPat("b01100????????????010?????0101111")
|
||||
def AMOMIN_W = BitPat("b10000????????????010?????0101111")
|
||||
def AMOMAX_W = BitPat("b10100????????????010?????0101111")
|
||||
def AMOMINU_W = BitPat("b11000????????????010?????0101111")
|
||||
def AMOMAXU_W = BitPat("b11100????????????010?????0101111")
|
||||
def AMOSWAP_W = BitPat("b00001????????????010?????0101111")
|
||||
def LR_W = BitPat("b00010??00000?????010?????0101111")
|
||||
def SC_W = BitPat("b00011????????????010?????0101111")
|
||||
def AMOADD_D = BitPat("b00000????????????011?????0101111")
|
||||
def AMOXOR_D = BitPat("b00100????????????011?????0101111")
|
||||
def AMOOR_D = BitPat("b01000????????????011?????0101111")
|
||||
def AMOAND_D = BitPat("b01100????????????011?????0101111")
|
||||
def AMOMIN_D = BitPat("b10000????????????011?????0101111")
|
||||
def AMOMAX_D = BitPat("b10100????????????011?????0101111")
|
||||
def AMOMINU_D = BitPat("b11000????????????011?????0101111")
|
||||
def AMOMAXU_D = BitPat("b11100????????????011?????0101111")
|
||||
def AMOSWAP_D = BitPat("b00001????????????011?????0101111")
|
||||
def LR_D = BitPat("b00010??00000?????011?????0101111")
|
||||
def SC_D = BitPat("b00011????????????011?????0101111")
|
||||
def ECALL = BitPat("b00000000000000000000000001110011")
|
||||
def EBREAK = BitPat("b00000000000100000000000001110011")
|
||||
def URET = BitPat("b00000000001000000000000001110011")
|
||||
def SRET = BitPat("b00010000001000000000000001110011")
|
||||
def HRET = BitPat("b00100000001000000000000001110011")
|
||||
def MRET = BitPat("b00110000001000000000000001110011")
|
||||
def DRET = BitPat("b01111011001000000000000001110011")
|
||||
def SFENCE_VM = BitPat("b000100000100?????000000001110011")
|
||||
def WFI = BitPat("b00010000010100000000000001110011")
|
||||
def CSRRW = BitPat("b?????????????????001?????1110011")
|
||||
def CSRRS = BitPat("b?????????????????010?????1110011")
|
||||
def CSRRC = BitPat("b?????????????????011?????1110011")
|
||||
def CSRRWI = BitPat("b?????????????????101?????1110011")
|
||||
def CSRRSI = BitPat("b?????????????????110?????1110011")
|
||||
def CSRRCI = BitPat("b?????????????????111?????1110011")
|
||||
def FADD_S = BitPat("b0000000??????????????????1010011")
|
||||
def FSUB_S = BitPat("b0000100??????????????????1010011")
|
||||
def FMUL_S = BitPat("b0001000??????????????????1010011")
|
||||
def FDIV_S = BitPat("b0001100??????????????????1010011")
|
||||
def FSGNJ_S = BitPat("b0010000??????????000?????1010011")
|
||||
def FSGNJN_S = BitPat("b0010000??????????001?????1010011")
|
||||
def FSGNJX_S = BitPat("b0010000??????????010?????1010011")
|
||||
def FMIN_S = BitPat("b0010100??????????000?????1010011")
|
||||
def FMAX_S = BitPat("b0010100??????????001?????1010011")
|
||||
def FSQRT_S = BitPat("b010110000000?????????????1010011")
|
||||
def FADD_D = BitPat("b0000001??????????????????1010011")
|
||||
def FSUB_D = BitPat("b0000101??????????????????1010011")
|
||||
def FMUL_D = BitPat("b0001001??????????????????1010011")
|
||||
def FDIV_D = BitPat("b0001101??????????????????1010011")
|
||||
def FSGNJ_D = BitPat("b0010001??????????000?????1010011")
|
||||
def FSGNJN_D = BitPat("b0010001??????????001?????1010011")
|
||||
def FSGNJX_D = BitPat("b0010001??????????010?????1010011")
|
||||
def FMIN_D = BitPat("b0010101??????????000?????1010011")
|
||||
def FMAX_D = BitPat("b0010101??????????001?????1010011")
|
||||
def FCVT_S_D = BitPat("b010000000001?????????????1010011")
|
||||
def FCVT_D_S = BitPat("b010000100000?????????????1010011")
|
||||
def FSQRT_D = BitPat("b010110100000?????????????1010011")
|
||||
def FLE_S = BitPat("b1010000??????????000?????1010011")
|
||||
def FLT_S = BitPat("b1010000??????????001?????1010011")
|
||||
def FEQ_S = BitPat("b1010000??????????010?????1010011")
|
||||
def FLE_D = BitPat("b1010001??????????000?????1010011")
|
||||
def FLT_D = BitPat("b1010001??????????001?????1010011")
|
||||
def FEQ_D = BitPat("b1010001??????????010?????1010011")
|
||||
def FCVT_W_S = BitPat("b110000000000?????????????1010011")
|
||||
def FCVT_WU_S = BitPat("b110000000001?????????????1010011")
|
||||
def FCVT_L_S = BitPat("b110000000010?????????????1010011")
|
||||
def FCVT_LU_S = BitPat("b110000000011?????????????1010011")
|
||||
def FMV_X_S = BitPat("b111000000000?????000?????1010011")
|
||||
def FCLASS_S = BitPat("b111000000000?????001?????1010011")
|
||||
def FCVT_W_D = BitPat("b110000100000?????????????1010011")
|
||||
def FCVT_WU_D = BitPat("b110000100001?????????????1010011")
|
||||
def FCVT_L_D = BitPat("b110000100010?????????????1010011")
|
||||
def FCVT_LU_D = BitPat("b110000100011?????????????1010011")
|
||||
def FMV_X_D = BitPat("b111000100000?????000?????1010011")
|
||||
def FCLASS_D = BitPat("b111000100000?????001?????1010011")
|
||||
def FCVT_S_W = BitPat("b110100000000?????????????1010011")
|
||||
def FCVT_S_WU = BitPat("b110100000001?????????????1010011")
|
||||
def FCVT_S_L = BitPat("b110100000010?????????????1010011")
|
||||
def FCVT_S_LU = BitPat("b110100000011?????????????1010011")
|
||||
def FMV_S_X = BitPat("b111100000000?????000?????1010011")
|
||||
def FCVT_D_W = BitPat("b110100100000?????????????1010011")
|
||||
def FCVT_D_WU = BitPat("b110100100001?????????????1010011")
|
||||
def FCVT_D_L = BitPat("b110100100010?????????????1010011")
|
||||
def FCVT_D_LU = BitPat("b110100100011?????????????1010011")
|
||||
def FMV_D_X = BitPat("b111100100000?????000?????1010011")
|
||||
def FLW = BitPat("b?????????????????010?????0000111")
|
||||
def FLD = BitPat("b?????????????????011?????0000111")
|
||||
def FSW = BitPat("b?????????????????010?????0100111")
|
||||
def FSD = BitPat("b?????????????????011?????0100111")
|
||||
def FMADD_S = BitPat("b?????00??????????????????1000011")
|
||||
def FMSUB_S = BitPat("b?????00??????????????????1000111")
|
||||
def FNMSUB_S = BitPat("b?????00??????????????????1001011")
|
||||
def FNMADD_S = BitPat("b?????00??????????????????1001111")
|
||||
def FMADD_D = BitPat("b?????01??????????????????1000011")
|
||||
def FMSUB_D = BitPat("b?????01??????????????????1000111")
|
||||
def FNMSUB_D = BitPat("b?????01??????????????????1001011")
|
||||
def FNMADD_D = BitPat("b?????01??????????????????1001111")
|
||||
def CUSTOM0 = BitPat("b?????????????????000?????0001011")
|
||||
def CUSTOM0_RS1 = BitPat("b?????????????????010?????0001011")
|
||||
def CUSTOM0_RS1_RS2 = BitPat("b?????????????????011?????0001011")
|
||||
def CUSTOM0_RD = BitPat("b?????????????????100?????0001011")
|
||||
def CUSTOM0_RD_RS1 = BitPat("b?????????????????110?????0001011")
|
||||
def CUSTOM0_RD_RS1_RS2 = BitPat("b?????????????????111?????0001011")
|
||||
def CUSTOM1 = BitPat("b?????????????????000?????0101011")
|
||||
def CUSTOM1_RS1 = BitPat("b?????????????????010?????0101011")
|
||||
def CUSTOM1_RS1_RS2 = BitPat("b?????????????????011?????0101011")
|
||||
def CUSTOM1_RD = BitPat("b?????????????????100?????0101011")
|
||||
def CUSTOM1_RD_RS1 = BitPat("b?????????????????110?????0101011")
|
||||
def CUSTOM1_RD_RS1_RS2 = BitPat("b?????????????????111?????0101011")
|
||||
def CUSTOM2 = BitPat("b?????????????????000?????1011011")
|
||||
def CUSTOM2_RS1 = BitPat("b?????????????????010?????1011011")
|
||||
def CUSTOM2_RS1_RS2 = BitPat("b?????????????????011?????1011011")
|
||||
def CUSTOM2_RD = BitPat("b?????????????????100?????1011011")
|
||||
def CUSTOM2_RD_RS1 = BitPat("b?????????????????110?????1011011")
|
||||
def CUSTOM2_RD_RS1_RS2 = BitPat("b?????????????????111?????1011011")
|
||||
def CUSTOM3 = BitPat("b?????????????????000?????1111011")
|
||||
def CUSTOM3_RS1 = BitPat("b?????????????????010?????1111011")
|
||||
def CUSTOM3_RS1_RS2 = BitPat("b?????????????????011?????1111011")
|
||||
def CUSTOM3_RD = BitPat("b?????????????????100?????1111011")
|
||||
def CUSTOM3_RD_RS1 = BitPat("b?????????????????110?????1111011")
|
||||
def CUSTOM3_RD_RS1_RS2 = BitPat("b?????????????????111?????1111011")
|
||||
def SLLI_RV32 = BitPat("b0000000??????????001?????0010011")
|
||||
def SRLI_RV32 = BitPat("b0000000??????????101?????0010011")
|
||||
def SRAI_RV32 = BitPat("b0100000??????????101?????0010011")
|
||||
def FRFLAGS = BitPat("b00000000000100000010?????1110011")
|
||||
def FSFLAGS = BitPat("b000000000001?????001?????1110011")
|
||||
def FSFLAGSI = BitPat("b000000000001?????101?????1110011")
|
||||
def FRRM = BitPat("b00000000001000000010?????1110011")
|
||||
def FSRM = BitPat("b000000000010?????001?????1110011")
|
||||
def FSRMI = BitPat("b000000000010?????101?????1110011")
|
||||
def FSCSR = BitPat("b000000000011?????001?????1110011")
|
||||
def FRCSR = BitPat("b00000000001100000010?????1110011")
|
||||
def RDCYCLE = BitPat("b11000000000000000010?????1110011")
|
||||
def RDTIME = BitPat("b11000000000100000010?????1110011")
|
||||
def RDINSTRET = BitPat("b11000000001000000010?????1110011")
|
||||
def RDCYCLEH = BitPat("b11001000000000000010?????1110011")
|
||||
def RDTIMEH = BitPat("b11001000000100000010?????1110011")
|
||||
def RDINSTRETH = BitPat("b11001000001000000010?????1110011")
|
||||
def SCALL = BitPat("b00000000000000000000000001110011")
|
||||
def SBREAK = BitPat("b00000000000100000000000001110011")
|
||||
}
|
||||
object Causes {
|
||||
val misaligned_fetch = 0x0
|
||||
val fault_fetch = 0x1
|
||||
val illegal_instruction = 0x2
|
||||
val breakpoint = 0x3
|
||||
val misaligned_load = 0x4
|
||||
val fault_load = 0x5
|
||||
val misaligned_store = 0x6
|
||||
val fault_store = 0x7
|
||||
val user_ecall = 0x8
|
||||
val supervisor_ecall = 0x9
|
||||
val hypervisor_ecall = 0xa
|
||||
val machine_ecall = 0xb
|
||||
val all = {
|
||||
val res = collection.mutable.ArrayBuffer[Int]()
|
||||
res += misaligned_fetch
|
||||
res += fault_fetch
|
||||
res += illegal_instruction
|
||||
res += breakpoint
|
||||
res += misaligned_load
|
||||
res += fault_load
|
||||
res += misaligned_store
|
||||
res += fault_store
|
||||
res += user_ecall
|
||||
res += supervisor_ecall
|
||||
res += hypervisor_ecall
|
||||
res += machine_ecall
|
||||
res.toArray
|
||||
}
|
||||
}
|
||||
object CSRs {
|
||||
val fflags = 0x1
|
||||
val frm = 0x2
|
||||
val fcsr = 0x3
|
||||
val cycle = 0xc00
|
||||
val time = 0xc01
|
||||
val instret = 0xc02
|
||||
val sstatus = 0x100
|
||||
val sie = 0x104
|
||||
val stvec = 0x105
|
||||
val sscratch = 0x140
|
||||
val sepc = 0x141
|
||||
val scause = 0x142
|
||||
val sbadaddr = 0x143
|
||||
val sip = 0x144
|
||||
val sptbr = 0x180
|
||||
val scycle = 0xd00
|
||||
val stime = 0xd01
|
||||
val sinstret = 0xd02
|
||||
val mstatus = 0x300
|
||||
val medeleg = 0x302
|
||||
val mideleg = 0x303
|
||||
val mie = 0x304
|
||||
val mtvec = 0x305
|
||||
val mscratch = 0x340
|
||||
val mepc = 0x341
|
||||
val mcause = 0x342
|
||||
val mbadaddr = 0x343
|
||||
val mip = 0x344
|
||||
val mucounteren = 0x310
|
||||
val mscounteren = 0x311
|
||||
val mucycle_delta = 0x700
|
||||
val mutime_delta = 0x701
|
||||
val muinstret_delta = 0x702
|
||||
val mscycle_delta = 0x704
|
||||
val mstime_delta = 0x705
|
||||
val msinstret_delta = 0x706
|
||||
val tdrselect = 0x7a0
|
||||
val tdrdata1 = 0x7a1
|
||||
val tdrdata2 = 0x7a2
|
||||
val tdrdata3 = 0x7a3
|
||||
val dcsr = 0x7b0
|
||||
val dpc = 0x7b1
|
||||
val dscratch = 0x7b2
|
||||
val mcycle = 0xf00
|
||||
val mtime = 0xf01
|
||||
val minstret = 0xf02
|
||||
val misa = 0xf10
|
||||
val mvendorid = 0xf11
|
||||
val marchid = 0xf12
|
||||
val mimpid = 0xf13
|
||||
val mhartid = 0xf14
|
||||
val mreset = 0x7c2
|
||||
val cycleh = 0xc80
|
||||
val timeh = 0xc81
|
||||
val instreth = 0xc82
|
||||
val mucycle_deltah = 0x780
|
||||
val mutime_deltah = 0x781
|
||||
val muinstret_deltah = 0x782
|
||||
val mscycle_deltah = 0x784
|
||||
val mstime_deltah = 0x785
|
||||
val msinstret_deltah = 0x786
|
||||
val mcycleh = 0xf80
|
||||
val mtimeh = 0xf81
|
||||
val minstreth = 0xf82
|
||||
val all = {
|
||||
val res = collection.mutable.ArrayBuffer[Int]()
|
||||
res += fflags
|
||||
res += frm
|
||||
res += fcsr
|
||||
res += cycle
|
||||
res += time
|
||||
res += instret
|
||||
res += sstatus
|
||||
res += sie
|
||||
res += stvec
|
||||
res += sscratch
|
||||
res += sepc
|
||||
res += scause
|
||||
res += sbadaddr
|
||||
res += sip
|
||||
res += sptbr
|
||||
res += scycle
|
||||
res += stime
|
||||
res += sinstret
|
||||
res += mstatus
|
||||
res += medeleg
|
||||
res += mideleg
|
||||
res += mie
|
||||
res += mtvec
|
||||
res += mscratch
|
||||
res += mepc
|
||||
res += mcause
|
||||
res += mbadaddr
|
||||
res += mip
|
||||
res += mucounteren
|
||||
res += mscounteren
|
||||
res += mucycle_delta
|
||||
res += mutime_delta
|
||||
res += muinstret_delta
|
||||
res += mscycle_delta
|
||||
res += mstime_delta
|
||||
res += msinstret_delta
|
||||
res += tdrselect
|
||||
res += tdrdata1
|
||||
res += tdrdata2
|
||||
res += tdrdata3
|
||||
res += dcsr
|
||||
res += dpc
|
||||
res += dscratch
|
||||
res += mcycle
|
||||
res += mtime
|
||||
res += minstret
|
||||
res += misa
|
||||
res += mvendorid
|
||||
res += marchid
|
||||
res += mimpid
|
||||
res += mhartid
|
||||
res += mreset
|
||||
res.toArray
|
||||
}
|
||||
val all32 = {
|
||||
val res = collection.mutable.ArrayBuffer(all:_*)
|
||||
res += cycleh
|
||||
res += timeh
|
||||
res += instreth
|
||||
res += mucycle_deltah
|
||||
res += mutime_deltah
|
||||
res += muinstret_deltah
|
||||
res += mscycle_deltah
|
||||
res += mstime_deltah
|
||||
res += msinstret_deltah
|
||||
res += mcycleh
|
||||
res += mtimeh
|
||||
res += minstreth
|
||||
res.toArray
|
||||
}
|
||||
}
|
||||
154
src/main/scala/rocket/multiplier.scala
Normal file
154
src/main/scala/rocket/multiplier.scala
Normal file
@@ -0,0 +1,154 @@
|
||||
// See LICENSE for license details.
|
||||
|
||||
package rocket
|
||||
|
||||
import Chisel._
|
||||
import ALU._
|
||||
import Util._
|
||||
|
||||
class MultiplierReq(dataBits: Int, tagBits: Int) extends Bundle {
|
||||
val fn = Bits(width = SZ_ALU_FN)
|
||||
val dw = Bits(width = SZ_DW)
|
||||
val in1 = Bits(width = dataBits)
|
||||
val in2 = Bits(width = dataBits)
|
||||
val tag = UInt(width = tagBits)
|
||||
override def cloneType = new MultiplierReq(dataBits, tagBits).asInstanceOf[this.type]
|
||||
}
|
||||
|
||||
class MultiplierResp(dataBits: Int, tagBits: Int) extends Bundle {
|
||||
val data = Bits(width = dataBits)
|
||||
val tag = UInt(width = tagBits)
|
||||
override def cloneType = new MultiplierResp(dataBits, tagBits).asInstanceOf[this.type]
|
||||
}
|
||||
|
||||
class MultiplierIO(dataBits: Int, tagBits: Int) extends Bundle {
|
||||
val req = Decoupled(new MultiplierReq(dataBits, tagBits)).flip
|
||||
val kill = Bool(INPUT)
|
||||
val resp = Decoupled(new MultiplierResp(dataBits, tagBits))
|
||||
}
|
||||
|
||||
case class MulDivConfig(
|
||||
mulUnroll: Int = 1,
|
||||
mulEarlyOut: Boolean = false,
|
||||
divEarlyOut: Boolean = false
|
||||
)
|
||||
|
||||
class MulDiv(cfg: MulDivConfig, width: Int, nXpr: Int = 32) extends Module {
|
||||
val io = new MultiplierIO(width, log2Up(nXpr))
|
||||
val w = io.req.bits.in1.getWidth
|
||||
val mulw = (w + cfg.mulUnroll - 1) / cfg.mulUnroll * cfg.mulUnroll
|
||||
|
||||
val s_ready :: s_neg_inputs :: s_busy :: s_move_rem :: s_neg_output :: s_done :: Nil = Enum(UInt(), 6)
|
||||
val state = Reg(init=s_ready)
|
||||
|
||||
val req = Reg(io.req.bits)
|
||||
val count = Reg(UInt(width = log2Up(w+1)))
|
||||
val neg_out = Reg(Bool())
|
||||
val isMul = Reg(Bool())
|
||||
val isHi = Reg(Bool())
|
||||
val divisor = Reg(Bits(width = w+1)) // div only needs w bits
|
||||
val remainder = Reg(Bits(width = 2*mulw+2)) // div only needs 2*w+1 bits
|
||||
|
||||
val cmdMul :: cmdHi :: lhsSigned :: rhsSigned :: Nil =
|
||||
DecodeLogic(io.req.bits.fn, List(X, X, X, X), List(
|
||||
FN_DIV -> List(N, N, Y, Y),
|
||||
FN_REM -> List(N, Y, Y, Y),
|
||||
FN_DIVU -> List(N, N, N, N),
|
||||
FN_REMU -> List(N, Y, N, N),
|
||||
FN_MUL -> List(Y, N, X, X),
|
||||
FN_MULH -> List(Y, Y, Y, Y),
|
||||
FN_MULHU -> List(Y, Y, N, N),
|
||||
FN_MULHSU -> List(Y, Y, Y, N))).map(_ toBool)
|
||||
|
||||
require(w == 32 || w == 64)
|
||||
def halfWidth(req: MultiplierReq) = Bool(w > 32) && req.dw === DW_32
|
||||
|
||||
def sext(x: Bits, halfW: Bool, signed: Bool) = {
|
||||
val sign = signed && Mux(halfW, x(w/2-1), x(w-1))
|
||||
val hi = Mux(halfW, Fill(w/2, sign), x(w-1,w/2))
|
||||
(Cat(hi, x(w/2-1,0)), sign)
|
||||
}
|
||||
val (lhs_in, lhs_sign) = sext(io.req.bits.in1, halfWidth(io.req.bits), lhsSigned)
|
||||
val (rhs_in, rhs_sign) = sext(io.req.bits.in2, halfWidth(io.req.bits), rhsSigned)
|
||||
|
||||
val subtractor = remainder(2*w,w) - divisor(w,0)
|
||||
val less = subtractor(w)
|
||||
val negated_remainder = -remainder(w-1,0)
|
||||
|
||||
when (state === s_neg_inputs) {
|
||||
when (remainder(w-1) || isMul) {
|
||||
remainder := negated_remainder
|
||||
}
|
||||
when (divisor(w-1) || isMul) {
|
||||
divisor := subtractor
|
||||
}
|
||||
state := s_busy
|
||||
}
|
||||
|
||||
when (state === s_neg_output) {
|
||||
remainder := negated_remainder
|
||||
state := s_done
|
||||
}
|
||||
when (state === s_move_rem) {
|
||||
remainder := remainder(2*w, w+1)
|
||||
state := Mux(neg_out, s_neg_output, s_done)
|
||||
}
|
||||
when (state === s_busy && isMul) {
|
||||
val mulReg = Cat(remainder(2*mulw+1,w+1),remainder(w-1,0))
|
||||
val mplier = mulReg(mulw-1,0)
|
||||
val accum = mulReg(2*mulw,mulw).asSInt
|
||||
val mpcand = divisor.asSInt
|
||||
val prod = mplier(cfg.mulUnroll-1, 0) * mpcand + accum
|
||||
val nextMulReg = Cat(prod, mplier(mulw-1, cfg.mulUnroll))
|
||||
|
||||
val eOutMask = (SInt(BigInt(-1) << mulw) >> (count * cfg.mulUnroll)(log2Up(mulw)-1,0))(mulw-1,0)
|
||||
val eOut = Bool(cfg.mulEarlyOut) && count =/= mulw/cfg.mulUnroll-1 && count =/= 0 &&
|
||||
!isHi && (mplier & ~eOutMask) === UInt(0)
|
||||
val eOutRes = (mulReg >> (mulw - count * cfg.mulUnroll)(log2Up(mulw)-1,0))
|
||||
val nextMulReg1 = Cat(nextMulReg(2*mulw,mulw), Mux(eOut, eOutRes, nextMulReg)(mulw-1,0))
|
||||
remainder := Cat(nextMulReg1 >> w, Bool(false), nextMulReg1(w-1,0))
|
||||
|
||||
count := count + 1
|
||||
when (eOut || count === mulw/cfg.mulUnroll-1) {
|
||||
state := Mux(isHi, s_move_rem, s_done)
|
||||
}
|
||||
}
|
||||
when (state === s_busy && !isMul) {
|
||||
when (count === w) {
|
||||
state := Mux(isHi, s_move_rem, Mux(neg_out, s_neg_output, s_done))
|
||||
}
|
||||
count := count + 1
|
||||
|
||||
remainder := Cat(Mux(less, remainder(2*w-1,w), subtractor(w-1,0)), remainder(w-1,0), !less)
|
||||
|
||||
val divisorMSB = Log2(divisor(w-1,0), w)
|
||||
val dividendMSB = Log2(remainder(w-1,0), w)
|
||||
val eOutPos = UInt(w-1) + divisorMSB - dividendMSB
|
||||
val eOutZero = divisorMSB > dividendMSB
|
||||
val eOut = count === 0 && less /* not divby0 */ && (eOutPos > 0 || eOutZero)
|
||||
when (Bool(cfg.divEarlyOut) && eOut) {
|
||||
val shift = Mux(eOutZero, UInt(w-1), eOutPos(log2Up(w)-1,0))
|
||||
remainder := remainder(w-1,0) << shift
|
||||
count := shift
|
||||
}
|
||||
when (count === 0 && !less /* divby0 */ && !isHi) { neg_out := false }
|
||||
}
|
||||
when (io.resp.fire() || io.kill) {
|
||||
state := s_ready
|
||||
}
|
||||
when (io.req.fire()) {
|
||||
state := Mux(lhs_sign || rhs_sign && !cmdMul, s_neg_inputs, s_busy)
|
||||
isMul := cmdMul
|
||||
isHi := cmdHi
|
||||
count := 0
|
||||
neg_out := !cmdMul && Mux(cmdHi, lhs_sign, lhs_sign =/= rhs_sign)
|
||||
divisor := Cat(rhs_sign, rhs_in)
|
||||
remainder := lhs_in
|
||||
req := io.req.bits
|
||||
}
|
||||
|
||||
io.resp.bits := req
|
||||
io.resp.bits.data := Mux(halfWidth(req), Cat(Fill(w/2, remainder(w/2-1)), remainder(w/2-1,0)), remainder(w-1,0))
|
||||
io.resp.valid := state === s_done
|
||||
io.req.ready := state === s_ready
|
||||
}
|
||||
1243
src/main/scala/rocket/nbdcache.scala
Normal file
1243
src/main/scala/rocket/nbdcache.scala
Normal file
File diff suppressed because it is too large
Load Diff
4
src/main/scala/rocket/package.scala
Normal file
4
src/main/scala/rocket/package.scala
Normal file
@@ -0,0 +1,4 @@
|
||||
// See LICENSE for license details.
|
||||
|
||||
package object rocket extends
|
||||
rocket.constants.ScalarOpConstants
|
||||
218
src/main/scala/rocket/ptw.scala
Normal file
218
src/main/scala/rocket/ptw.scala
Normal file
@@ -0,0 +1,218 @@
|
||||
// See LICENSE for license details.
|
||||
|
||||
package rocket
|
||||
|
||||
import Chisel._
|
||||
import uncore.agents._
|
||||
import uncore.constants._
|
||||
import Util._
|
||||
import uncore.util._
|
||||
import cde.{Parameters, Field}
|
||||
|
||||
class PTWReq(implicit p: Parameters) extends CoreBundle()(p) {
|
||||
val prv = Bits(width = 2)
|
||||
val pum = Bool()
|
||||
val mxr = Bool()
|
||||
val addr = UInt(width = vpnBits)
|
||||
val store = Bool()
|
||||
val fetch = Bool()
|
||||
}
|
||||
|
||||
class PTWResp(implicit p: Parameters) extends CoreBundle()(p) {
|
||||
val pte = new PTE
|
||||
}
|
||||
|
||||
class TLBPTWIO(implicit p: Parameters) extends CoreBundle()(p) {
|
||||
val req = Decoupled(new PTWReq)
|
||||
val resp = Valid(new PTWResp).flip
|
||||
val ptbr = new PTBR().asInput
|
||||
val invalidate = Bool(INPUT)
|
||||
val status = new MStatus().asInput
|
||||
}
|
||||
|
||||
class DatapathPTWIO(implicit p: Parameters) extends CoreBundle()(p) {
|
||||
val ptbr = new PTBR().asInput
|
||||
val invalidate = Bool(INPUT)
|
||||
val status = new MStatus().asInput
|
||||
}
|
||||
|
||||
class PTE(implicit p: Parameters) extends CoreBundle()(p) {
|
||||
val reserved_for_hardware = Bits(width = 16)
|
||||
val ppn = UInt(width = 38)
|
||||
val reserved_for_software = Bits(width = 2)
|
||||
val d = Bool()
|
||||
val a = Bool()
|
||||
val g = Bool()
|
||||
val u = Bool()
|
||||
val x = Bool()
|
||||
val w = Bool()
|
||||
val r = Bool()
|
||||
val v = Bool()
|
||||
|
||||
def table(dummy: Int = 0) = v && !r && !w && !x
|
||||
def leaf(dummy: Int = 0) = v && (r || (x && !w))
|
||||
def ur(dummy: Int = 0) = sr() && u
|
||||
def uw(dummy: Int = 0) = sw() && u
|
||||
def ux(dummy: Int = 0) = sx() && u
|
||||
def sr(dummy: Int = 0) = leaf() && r
|
||||
def sw(dummy: Int = 0) = leaf() && w
|
||||
def sx(dummy: Int = 0) = leaf() && x
|
||||
|
||||
def access_ok(req: PTWReq) = {
|
||||
val perm_ok = Mux(req.fetch, x, Mux(req.store, w, r || (x && req.mxr)))
|
||||
val priv_ok = Mux(u, !req.pum, req.prv(0))
|
||||
leaf() && priv_ok && perm_ok
|
||||
}
|
||||
}
|
||||
|
||||
class PTW(n: Int)(implicit p: Parameters) extends CoreModule()(p) {
|
||||
val io = new Bundle {
|
||||
val requestor = Vec(n, new TLBPTWIO).flip
|
||||
val mem = new HellaCacheIO
|
||||
val dpath = new DatapathPTWIO
|
||||
}
|
||||
|
||||
require(usingAtomics, "PTW requires atomic memory operations")
|
||||
|
||||
val s_ready :: s_req :: s_wait1 :: s_wait2 :: s_set_dirty :: s_wait1_dirty :: s_wait2_dirty :: s_done :: Nil = Enum(UInt(), 8)
|
||||
val state = Reg(init=s_ready)
|
||||
val count = Reg(UInt(width = log2Up(pgLevels)))
|
||||
val s1_kill = Reg(next = Bool(false))
|
||||
|
||||
val r_req = Reg(new PTWReq)
|
||||
val r_req_dest = Reg(Bits())
|
||||
val r_pte = Reg(new PTE)
|
||||
|
||||
val vpn_idxs = (0 until pgLevels).map(i => (r_req.addr >> (pgLevels-i-1)*pgLevelBits)(pgLevelBits-1,0))
|
||||
val vpn_idx = vpn_idxs(count)
|
||||
|
||||
val arb = Module(new RRArbiter(new PTWReq, n))
|
||||
arb.io.in <> io.requestor.map(_.req)
|
||||
arb.io.out.ready := state === s_ready
|
||||
|
||||
val pte = {
|
||||
val tmp = new PTE().fromBits(io.mem.resp.bits.data)
|
||||
val res = Wire(init = new PTE().fromBits(io.mem.resp.bits.data))
|
||||
res.ppn := tmp.ppn(ppnBits-1, 0)
|
||||
when ((tmp.ppn >> ppnBits) =/= 0) { res.v := false }
|
||||
res
|
||||
}
|
||||
val pte_addr = Cat(r_pte.ppn, vpn_idx) << log2Ceil(xLen/8)
|
||||
|
||||
when (arb.io.out.fire()) {
|
||||
r_req := arb.io.out.bits
|
||||
r_req_dest := arb.io.chosen
|
||||
r_pte.ppn := io.dpath.ptbr.ppn
|
||||
}
|
||||
|
||||
val (pte_cache_hit, pte_cache_data) = {
|
||||
val size = 1 << log2Up(pgLevels * 2)
|
||||
val plru = new PseudoLRU(size)
|
||||
val valid = Reg(init = UInt(0, size))
|
||||
val tags = Reg(Vec(size, UInt(width = paddrBits)))
|
||||
val data = Reg(Vec(size, UInt(width = ppnBits)))
|
||||
|
||||
val hits = tags.map(_ === pte_addr).asUInt & valid
|
||||
val hit = hits.orR
|
||||
when (io.mem.resp.valid && pte.table() && !hit) {
|
||||
val r = Mux(valid.andR, plru.replace, PriorityEncoder(~valid))
|
||||
valid := valid | UIntToOH(r)
|
||||
tags(r) := pte_addr
|
||||
data(r) := pte.ppn
|
||||
}
|
||||
when (hit && state === s_req) { plru.access(OHToUInt(hits)) }
|
||||
when (io.dpath.invalidate) { valid := 0 }
|
||||
|
||||
(hit && count < pgLevels-1, Mux1H(hits, data))
|
||||
}
|
||||
|
||||
val pte_wdata = Wire(init=new PTE().fromBits(0))
|
||||
pte_wdata.a := true
|
||||
pte_wdata.d := r_req.store
|
||||
|
||||
io.mem.req.valid := state.isOneOf(s_req, s_set_dirty)
|
||||
io.mem.req.bits.phys := Bool(true)
|
||||
io.mem.req.bits.cmd := Mux(state === s_set_dirty, M_XA_OR, M_XRD)
|
||||
io.mem.req.bits.typ := log2Ceil(xLen/8)
|
||||
io.mem.req.bits.addr := pte_addr
|
||||
io.mem.s1_data := pte_wdata.asUInt
|
||||
io.mem.s1_kill := s1_kill
|
||||
io.mem.invalidate_lr := Bool(false)
|
||||
|
||||
val resp_ppns = (0 until pgLevels-1).map(i => Cat(pte_addr >> (pgIdxBits + pgLevelBits*(pgLevels-i-1)), r_req.addr(pgLevelBits*(pgLevels-i-1)-1,0))) :+ (pte_addr >> pgIdxBits)
|
||||
for (i <- 0 until io.requestor.size) {
|
||||
io.requestor(i).resp.valid := state === s_done && (r_req_dest === i)
|
||||
io.requestor(i).resp.bits.pte := r_pte
|
||||
io.requestor(i).resp.bits.pte.ppn := resp_ppns(count)
|
||||
io.requestor(i).ptbr := io.dpath.ptbr
|
||||
io.requestor(i).invalidate := io.dpath.invalidate
|
||||
io.requestor(i).status := io.dpath.status
|
||||
}
|
||||
|
||||
// control state machine
|
||||
switch (state) {
|
||||
is (s_ready) {
|
||||
when (arb.io.out.valid) {
|
||||
state := s_req
|
||||
}
|
||||
count := UInt(0)
|
||||
}
|
||||
is (s_req) {
|
||||
when (pte_cache_hit) {
|
||||
s1_kill := true
|
||||
state := s_req
|
||||
count := count + 1
|
||||
r_pte.ppn := pte_cache_data
|
||||
}.elsewhen (io.mem.req.ready) {
|
||||
state := s_wait1
|
||||
}
|
||||
}
|
||||
is (s_wait1) {
|
||||
state := s_wait2
|
||||
when (io.mem.xcpt.pf.ld) {
|
||||
r_pte.v := false
|
||||
state := s_done
|
||||
}
|
||||
}
|
||||
is (s_wait2) {
|
||||
when (io.mem.s2_nack) {
|
||||
state := s_req
|
||||
}
|
||||
when (io.mem.resp.valid) {
|
||||
state := s_done
|
||||
when (pte.access_ok(r_req) && (!pte.a || (r_req.store && !pte.d))) {
|
||||
state := s_set_dirty
|
||||
}.otherwise {
|
||||
r_pte := pte
|
||||
}
|
||||
when (pte.table() && count < pgLevels-1) {
|
||||
state := s_req
|
||||
count := count + 1
|
||||
}
|
||||
}
|
||||
}
|
||||
is (s_set_dirty) {
|
||||
when (io.mem.req.ready) {
|
||||
state := s_wait1_dirty
|
||||
}
|
||||
}
|
||||
is (s_wait1_dirty) {
|
||||
state := s_wait2_dirty
|
||||
when (io.mem.xcpt.pf.st) {
|
||||
r_pte.v := false
|
||||
state := s_done
|
||||
}
|
||||
}
|
||||
is (s_wait2_dirty) {
|
||||
when (io.mem.s2_nack) {
|
||||
state := s_set_dirty
|
||||
}
|
||||
when (io.mem.resp.valid) {
|
||||
state := s_req
|
||||
}
|
||||
}
|
||||
is (s_done) {
|
||||
state := s_ready
|
||||
}
|
||||
}
|
||||
}
|
||||
290
src/main/scala/rocket/rocc.scala
Normal file
290
src/main/scala/rocket/rocc.scala
Normal file
@@ -0,0 +1,290 @@
|
||||
// See LICENSE for license details.
|
||||
|
||||
package rocket
|
||||
|
||||
import Chisel._
|
||||
import uncore.tilelink._
|
||||
import uncore.constants._
|
||||
import uncore.agents.CacheName
|
||||
import Util._
|
||||
import cde.{Parameters, Field}
|
||||
|
||||
case object RoccMaxTaggedMemXacts extends Field[Int]
|
||||
case object RoccNMemChannels extends Field[Int]
|
||||
case object RoccNPTWPorts extends Field[Int]
|
||||
|
||||
class RoCCInstruction extends Bundle
|
||||
{
|
||||
val funct = Bits(width = 7)
|
||||
val rs2 = Bits(width = 5)
|
||||
val rs1 = Bits(width = 5)
|
||||
val xd = Bool()
|
||||
val xs1 = Bool()
|
||||
val xs2 = Bool()
|
||||
val rd = Bits(width = 5)
|
||||
val opcode = Bits(width = 7)
|
||||
}
|
||||
|
||||
class RoCCCommand(implicit p: Parameters) extends CoreBundle()(p) {
|
||||
val inst = new RoCCInstruction
|
||||
val rs1 = Bits(width = xLen)
|
||||
val rs2 = Bits(width = xLen)
|
||||
val status = new MStatus
|
||||
}
|
||||
|
||||
class RoCCResponse(implicit p: Parameters) extends CoreBundle()(p) {
|
||||
val rd = Bits(width = 5)
|
||||
val data = Bits(width = xLen)
|
||||
}
|
||||
|
||||
class RoCCInterface(implicit p: Parameters) extends CoreBundle()(p) {
|
||||
val cmd = Decoupled(new RoCCCommand).flip
|
||||
val resp = Decoupled(new RoCCResponse)
|
||||
val mem = new HellaCacheIO()(p.alterPartial({ case CacheName => "L1D" }))
|
||||
val busy = Bool(OUTPUT)
|
||||
val interrupt = Bool(OUTPUT)
|
||||
|
||||
// These should be handled differently, eventually
|
||||
val autl = new ClientUncachedTileLinkIO
|
||||
val utl = Vec(p(RoccNMemChannels), new ClientUncachedTileLinkIO)
|
||||
val ptw = Vec(p(RoccNPTWPorts), new TLBPTWIO)
|
||||
val fpu_req = Decoupled(new FPInput)
|
||||
val fpu_resp = Decoupled(new FPResult).flip
|
||||
val exception = Bool(INPUT)
|
||||
|
||||
override def cloneType = new RoCCInterface().asInstanceOf[this.type]
|
||||
}
|
||||
|
||||
abstract class RoCC(implicit p: Parameters) extends CoreModule()(p) {
|
||||
val io = new RoCCInterface
|
||||
io.mem.req.bits.phys := Bool(true) // don't perform address translation
|
||||
}
|
||||
|
||||
class AccumulatorExample(n: Int = 4)(implicit p: Parameters) extends RoCC()(p) {
|
||||
val regfile = Mem(n, UInt(width = xLen))
|
||||
val busy = Reg(init = Vec.fill(n){Bool(false)})
|
||||
|
||||
val cmd = Queue(io.cmd)
|
||||
val funct = cmd.bits.inst.funct
|
||||
val addr = cmd.bits.rs2(log2Up(n)-1,0)
|
||||
val doWrite = funct === UInt(0)
|
||||
val doRead = funct === UInt(1)
|
||||
val doLoad = funct === UInt(2)
|
||||
val doAccum = funct === UInt(3)
|
||||
val memRespTag = io.mem.resp.bits.tag(log2Up(n)-1,0)
|
||||
|
||||
// datapath
|
||||
val addend = cmd.bits.rs1
|
||||
val accum = regfile(addr)
|
||||
val wdata = Mux(doWrite, addend, accum + addend)
|
||||
|
||||
when (cmd.fire() && (doWrite || doAccum)) {
|
||||
regfile(addr) := wdata
|
||||
}
|
||||
|
||||
when (io.mem.resp.valid) {
|
||||
regfile(memRespTag) := io.mem.resp.bits.data
|
||||
busy(memRespTag) := Bool(false)
|
||||
}
|
||||
|
||||
// control
|
||||
when (io.mem.req.fire()) {
|
||||
busy(addr) := Bool(true)
|
||||
}
|
||||
|
||||
val doResp = cmd.bits.inst.xd
|
||||
val stallReg = busy(addr)
|
||||
val stallLoad = doLoad && !io.mem.req.ready
|
||||
val stallResp = doResp && !io.resp.ready
|
||||
|
||||
cmd.ready := !stallReg && !stallLoad && !stallResp
|
||||
// command resolved if no stalls AND not issuing a load that will need a request
|
||||
|
||||
// PROC RESPONSE INTERFACE
|
||||
io.resp.valid := cmd.valid && doResp && !stallReg && !stallLoad
|
||||
// valid response if valid command, need a response, and no stalls
|
||||
io.resp.bits.rd := cmd.bits.inst.rd
|
||||
// Must respond with the appropriate tag or undefined behavior
|
||||
io.resp.bits.data := accum
|
||||
// Semantics is to always send out prior accumulator register value
|
||||
|
||||
io.busy := cmd.valid || busy.reduce(_||_)
|
||||
// Be busy when have pending memory requests or committed possibility of pending requests
|
||||
io.interrupt := Bool(false)
|
||||
// Set this true to trigger an interrupt on the processor (please refer to supervisor documentation)
|
||||
|
||||
// MEMORY REQUEST INTERFACE
|
||||
io.mem.req.valid := cmd.valid && doLoad && !stallReg && !stallResp
|
||||
io.mem.req.bits.addr := addend
|
||||
io.mem.req.bits.tag := addr
|
||||
io.mem.req.bits.cmd := M_XRD // perform a load (M_XWR for stores)
|
||||
io.mem.req.bits.typ := MT_D // D = 8 bytes, W = 4, H = 2, B = 1
|
||||
io.mem.req.bits.data := Bits(0) // we're not performing any stores...
|
||||
io.mem.invalidate_lr := false
|
||||
|
||||
io.autl.acquire.valid := false
|
||||
io.autl.grant.ready := false
|
||||
}
|
||||
|
||||
class TranslatorExample(implicit p: Parameters) extends RoCC()(p) {
|
||||
val req_addr = Reg(UInt(width = coreMaxAddrBits))
|
||||
val req_rd = Reg(io.resp.bits.rd)
|
||||
val req_offset = req_addr(pgIdxBits - 1, 0)
|
||||
val req_vpn = req_addr(coreMaxAddrBits - 1, pgIdxBits)
|
||||
val pte = Reg(new PTE)
|
||||
|
||||
val s_idle :: s_ptw_req :: s_ptw_resp :: s_resp :: Nil = Enum(Bits(), 4)
|
||||
val state = Reg(init = s_idle)
|
||||
|
||||
io.cmd.ready := (state === s_idle)
|
||||
|
||||
when (io.cmd.fire()) {
|
||||
req_rd := io.cmd.bits.inst.rd
|
||||
req_addr := io.cmd.bits.rs1
|
||||
state := s_ptw_req
|
||||
}
|
||||
|
||||
private val ptw = io.ptw(0)
|
||||
|
||||
when (ptw.req.fire()) { state := s_ptw_resp }
|
||||
|
||||
when (state === s_ptw_resp && ptw.resp.valid) {
|
||||
pte := ptw.resp.bits.pte
|
||||
state := s_resp
|
||||
}
|
||||
|
||||
when (io.resp.fire()) { state := s_idle }
|
||||
|
||||
ptw.req.valid := (state === s_ptw_req)
|
||||
ptw.req.bits.addr := req_vpn
|
||||
ptw.req.bits.store := Bool(false)
|
||||
ptw.req.bits.fetch := Bool(false)
|
||||
|
||||
io.resp.valid := (state === s_resp)
|
||||
io.resp.bits.rd := req_rd
|
||||
io.resp.bits.data := Mux(pte.leaf(), Cat(pte.ppn, req_offset), SInt(-1, xLen).asUInt)
|
||||
|
||||
io.busy := (state =/= s_idle)
|
||||
io.interrupt := Bool(false)
|
||||
io.mem.req.valid := Bool(false)
|
||||
io.mem.invalidate_lr := Bool(false)
|
||||
io.autl.acquire.valid := Bool(false)
|
||||
io.autl.grant.ready := Bool(false)
|
||||
}
|
||||
|
||||
class CharacterCountExample(implicit p: Parameters) extends RoCC()(p)
|
||||
with HasTileLinkParameters {
|
||||
|
||||
private val blockOffset = tlBeatAddrBits + tlByteAddrBits
|
||||
|
||||
val needle = Reg(UInt(width = 8))
|
||||
val addr = Reg(UInt(width = coreMaxAddrBits))
|
||||
val count = Reg(UInt(width = xLen))
|
||||
val resp_rd = Reg(io.resp.bits.rd)
|
||||
|
||||
val addr_block = addr(coreMaxAddrBits - 1, blockOffset)
|
||||
val offset = addr(blockOffset - 1, 0)
|
||||
val next_addr = (addr_block + UInt(1)) << UInt(blockOffset)
|
||||
|
||||
val s_idle :: s_acq :: s_gnt :: s_check :: s_resp :: Nil = Enum(Bits(), 5)
|
||||
val state = Reg(init = s_idle)
|
||||
|
||||
val gnt = io.autl.grant.bits
|
||||
val recv_data = Reg(UInt(width = tlDataBits))
|
||||
val recv_beat = Reg(UInt(width = tlBeatAddrBits))
|
||||
|
||||
val data_bytes = Vec.tabulate(tlDataBytes) { i => recv_data(8 * (i + 1) - 1, 8 * i) }
|
||||
val zero_match = data_bytes.map(_ === UInt(0))
|
||||
val needle_match = data_bytes.map(_ === needle)
|
||||
val first_zero = PriorityEncoder(zero_match)
|
||||
|
||||
val chars_found = PopCount(needle_match.zipWithIndex.map {
|
||||
case (matches, i) =>
|
||||
val idx = Cat(recv_beat, UInt(i, tlByteAddrBits))
|
||||
matches && idx >= offset && UInt(i) <= first_zero
|
||||
})
|
||||
val zero_found = zero_match.reduce(_ || _)
|
||||
val finished = Reg(Bool())
|
||||
|
||||
io.cmd.ready := (state === s_idle)
|
||||
io.resp.valid := (state === s_resp)
|
||||
io.resp.bits.rd := resp_rd
|
||||
io.resp.bits.data := count
|
||||
io.autl.acquire.valid := (state === s_acq)
|
||||
io.autl.acquire.bits := GetBlock(addr_block = addr_block)
|
||||
io.autl.grant.ready := (state === s_gnt)
|
||||
|
||||
when (io.cmd.fire()) {
|
||||
addr := io.cmd.bits.rs1
|
||||
needle := io.cmd.bits.rs2
|
||||
resp_rd := io.cmd.bits.inst.rd
|
||||
count := UInt(0)
|
||||
finished := Bool(false)
|
||||
state := s_acq
|
||||
}
|
||||
|
||||
when (io.autl.acquire.fire()) { state := s_gnt }
|
||||
|
||||
when (io.autl.grant.fire()) {
|
||||
recv_beat := gnt.addr_beat
|
||||
recv_data := gnt.data
|
||||
state := s_check
|
||||
}
|
||||
|
||||
when (state === s_check) {
|
||||
when (!finished) {
|
||||
count := count + chars_found
|
||||
}
|
||||
when (zero_found) { finished := Bool(true) }
|
||||
when (recv_beat === UInt(tlDataBeats - 1)) {
|
||||
addr := next_addr
|
||||
state := Mux(zero_found || finished, s_resp, s_acq)
|
||||
} .otherwise {
|
||||
state := s_gnt
|
||||
}
|
||||
}
|
||||
|
||||
when (io.resp.fire()) { state := s_idle }
|
||||
|
||||
io.busy := (state =/= s_idle)
|
||||
io.interrupt := Bool(false)
|
||||
io.mem.req.valid := Bool(false)
|
||||
io.mem.invalidate_lr := Bool(false)
|
||||
}
|
||||
|
||||
class OpcodeSet(val opcodes: Seq[UInt]) {
|
||||
def |(set: OpcodeSet) =
|
||||
new OpcodeSet(this.opcodes ++ set.opcodes)
|
||||
|
||||
def matches(oc: UInt) = opcodes.map(_ === oc).reduce(_ || _)
|
||||
}
|
||||
|
||||
object OpcodeSet {
|
||||
val custom0 = new OpcodeSet(Seq(Bits("b0001011")))
|
||||
val custom1 = new OpcodeSet(Seq(Bits("b0101011")))
|
||||
val custom2 = new OpcodeSet(Seq(Bits("b1011011")))
|
||||
val custom3 = new OpcodeSet(Seq(Bits("b1111011")))
|
||||
val all = custom0 | custom1 | custom2 | custom3
|
||||
}
|
||||
|
||||
class RoccCommandRouter(opcodes: Seq[OpcodeSet])(implicit p: Parameters)
|
||||
extends CoreModule()(p) {
|
||||
val io = new Bundle {
|
||||
val in = Decoupled(new RoCCCommand).flip
|
||||
val out = Vec(opcodes.size, Decoupled(new RoCCCommand))
|
||||
val busy = Bool(OUTPUT)
|
||||
}
|
||||
|
||||
val cmd = Queue(io.in)
|
||||
val cmdReadys = io.out.zip(opcodes).map { case (out, opcode) =>
|
||||
val me = opcode.matches(cmd.bits.inst.opcode)
|
||||
out.valid := cmd.valid && me
|
||||
out.bits := cmd.bits
|
||||
out.ready && me
|
||||
}
|
||||
cmd.ready := cmdReadys.reduce(_ || _)
|
||||
io.busy := cmd.valid
|
||||
|
||||
assert(PopCount(cmdReadys) <= UInt(1),
|
||||
"Custom opcode matched for more than one accelerator")
|
||||
}
|
||||
702
src/main/scala/rocket/rocket.scala
Normal file
702
src/main/scala/rocket/rocket.scala
Normal file
@@ -0,0 +1,702 @@
|
||||
// See LICENSE for license details.
|
||||
|
||||
package rocket
|
||||
|
||||
import Chisel._
|
||||
import junctions._
|
||||
import uncore.devices._
|
||||
import uncore.agents.CacheName
|
||||
import uncore.constants._
|
||||
import Util._
|
||||
import cde.{Parameters, Field}
|
||||
|
||||
case object XLen extends Field[Int]
|
||||
case object FetchWidth extends Field[Int]
|
||||
case object RetireWidth extends Field[Int]
|
||||
case object FPUKey extends Field[Option[FPUConfig]]
|
||||
case object MulDivKey extends Field[Option[MulDivConfig]]
|
||||
case object UseVM extends Field[Boolean]
|
||||
case object UseUser extends Field[Boolean]
|
||||
case object UseDebug extends Field[Boolean]
|
||||
case object UseAtomics extends Field[Boolean]
|
||||
case object UseCompressed extends Field[Boolean]
|
||||
case object FastLoadWord extends Field[Boolean]
|
||||
case object FastLoadByte extends Field[Boolean]
|
||||
case object CoreInstBits extends Field[Int]
|
||||
case object NCustomMRWCSRs extends Field[Int]
|
||||
case object MtvecWritable extends Field[Boolean]
|
||||
case object MtvecInit extends Field[BigInt]
|
||||
case object ResetVector extends Field[BigInt]
|
||||
case object NBreakpoints extends Field[Int]
|
||||
|
||||
trait HasCoreParameters extends HasAddrMapParameters {
|
||||
implicit val p: Parameters
|
||||
val xLen = p(XLen)
|
||||
|
||||
val usingVM = p(UseVM)
|
||||
val usingUser = p(UseUser) || usingVM
|
||||
val usingDebug = p(UseDebug)
|
||||
val usingMulDiv = p(MulDivKey).nonEmpty
|
||||
val usingFPU = p(FPUKey).nonEmpty
|
||||
val usingAtomics = p(UseAtomics)
|
||||
val usingCompressed = p(UseCompressed)
|
||||
val usingRoCC = !p(BuildRoCC).isEmpty
|
||||
val fastLoadWord = p(FastLoadWord)
|
||||
val fastLoadByte = p(FastLoadByte)
|
||||
|
||||
val retireWidth = p(RetireWidth)
|
||||
val fetchWidth = p(FetchWidth)
|
||||
val coreInstBits = p(CoreInstBits)
|
||||
val coreInstBytes = coreInstBits/8
|
||||
val coreDataBits = xLen
|
||||
val coreDataBytes = coreDataBits/8
|
||||
val dcacheArbPorts = 1 + (if (usingVM) 1 else 0) + p(BuildRoCC).size
|
||||
val coreDCacheReqTagBits = 6
|
||||
val dcacheReqTagBits = coreDCacheReqTagBits + log2Ceil(dcacheArbPorts)
|
||||
|
||||
def pgIdxBits = 12
|
||||
def pgLevelBits = 10 - log2Ceil(xLen / 32)
|
||||
def vaddrBits = pgIdxBits + pgLevels * pgLevelBits
|
||||
def ppnBits = paddrBits - pgIdxBits
|
||||
def vpnBits = vaddrBits - pgIdxBits
|
||||
val pgLevels = p(PgLevels)
|
||||
val asIdBits = p(ASIdBits)
|
||||
val vpnBitsExtended = vpnBits + (vaddrBits < xLen).toInt
|
||||
val vaddrBitsExtended = vpnBitsExtended + pgIdxBits
|
||||
val coreMaxAddrBits = paddrBits max vaddrBitsExtended
|
||||
val nCustomMrwCsrs = p(NCustomMRWCSRs)
|
||||
val nCores = p(NTiles)
|
||||
|
||||
// fetchWidth doubled, but coreInstBytes halved, for RVC
|
||||
val decodeWidth = fetchWidth / (if (usingCompressed) 2 else 1)
|
||||
|
||||
// Print out log of committed instructions and their writeback values.
|
||||
// Requires post-processing due to out-of-order writebacks.
|
||||
val enableCommitLog = false
|
||||
|
||||
val maxPAddrBits = xLen match {
|
||||
case 32 => 34
|
||||
case 64 => 50
|
||||
}
|
||||
|
||||
require(paddrBits <= maxPAddrBits)
|
||||
require(!fastLoadByte || fastLoadWord)
|
||||
}
|
||||
|
||||
abstract class CoreModule(implicit val p: Parameters) extends Module
|
||||
with HasCoreParameters
|
||||
abstract class CoreBundle(implicit val p: Parameters) extends ParameterizedBundle()(p)
|
||||
with HasCoreParameters
|
||||
|
||||
class RegFile(n: Int, w: Int, zero: Boolean = false) {
|
||||
private val rf = Mem(n, UInt(width = w))
|
||||
private def access(addr: UInt) = rf(~addr(log2Up(n)-1,0))
|
||||
private val reads = collection.mutable.ArrayBuffer[(UInt,UInt)]()
|
||||
private var canRead = true
|
||||
def read(addr: UInt) = {
|
||||
require(canRead)
|
||||
reads += addr -> Wire(UInt())
|
||||
reads.last._2 := Mux(Bool(zero) && addr === UInt(0), UInt(0), access(addr))
|
||||
reads.last._2
|
||||
}
|
||||
def write(addr: UInt, data: UInt) = {
|
||||
canRead = false
|
||||
when (addr =/= UInt(0)) {
|
||||
access(addr) := data
|
||||
for ((raddr, rdata) <- reads)
|
||||
when (addr === raddr) { rdata := data }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
object ImmGen {
|
||||
def apply(sel: UInt, inst: UInt) = {
|
||||
val sign = Mux(sel === IMM_Z, SInt(0), inst(31).asSInt)
|
||||
val b30_20 = Mux(sel === IMM_U, inst(30,20).asSInt, sign)
|
||||
val b19_12 = Mux(sel =/= IMM_U && sel =/= IMM_UJ, sign, inst(19,12).asSInt)
|
||||
val b11 = Mux(sel === IMM_U || sel === IMM_Z, SInt(0),
|
||||
Mux(sel === IMM_UJ, inst(20).asSInt,
|
||||
Mux(sel === IMM_SB, inst(7).asSInt, sign)))
|
||||
val b10_5 = Mux(sel === IMM_U || sel === IMM_Z, Bits(0), inst(30,25))
|
||||
val b4_1 = Mux(sel === IMM_U, Bits(0),
|
||||
Mux(sel === IMM_S || sel === IMM_SB, inst(11,8),
|
||||
Mux(sel === IMM_Z, inst(19,16), inst(24,21))))
|
||||
val b0 = Mux(sel === IMM_S, inst(7),
|
||||
Mux(sel === IMM_I, inst(20),
|
||||
Mux(sel === IMM_Z, inst(15), Bits(0))))
|
||||
|
||||
Cat(sign, b30_20, b19_12, b11, b10_5, b4_1, b0).asSInt
|
||||
}
|
||||
}
|
||||
|
||||
class Rocket(implicit p: Parameters) extends CoreModule()(p) {
|
||||
val io = new Bundle {
|
||||
val prci = new PRCITileIO().flip
|
||||
val imem = new FrontendIO()(p.alterPartial({case CacheName => "L1I" }))
|
||||
val dmem = new HellaCacheIO()(p.alterPartial({ case CacheName => "L1D" }))
|
||||
val ptw = new DatapathPTWIO().flip
|
||||
val fpu = new FPUIO().flip
|
||||
val rocc = new RoCCInterface().flip
|
||||
}
|
||||
|
||||
val decode_table = {
|
||||
(if (usingMulDiv) new MDecode +: (if (xLen > 32) Seq(new M64Decode) else Nil) else Nil) ++:
|
||||
(if (usingAtomics) new ADecode +: (if (xLen > 32) Seq(new A64Decode) else Nil) else Nil) ++:
|
||||
(if (usingFPU) new FDecode +: (if (xLen > 32) Seq(new F64Decode) else Nil) else Nil) ++:
|
||||
(if (usingRoCC) Some(new RoCCDecode) else None) ++:
|
||||
(if (xLen > 32) Some(new I64Decode) else None) ++:
|
||||
(if (usingVM) Some(new SDecode) else None) ++:
|
||||
(if (usingDebug) Some(new DebugDecode) else None) ++:
|
||||
Seq(new IDecode)
|
||||
} flatMap(_.table)
|
||||
|
||||
val ex_ctrl = Reg(new IntCtrlSigs)
|
||||
val mem_ctrl = Reg(new IntCtrlSigs)
|
||||
val wb_ctrl = Reg(new IntCtrlSigs)
|
||||
|
||||
val ex_reg_xcpt_interrupt = Reg(Bool())
|
||||
val ex_reg_valid = Reg(Bool())
|
||||
val ex_reg_rvc = Reg(Bool())
|
||||
val ex_reg_btb_hit = Reg(Bool())
|
||||
val ex_reg_btb_resp = Reg(new BTBResp)
|
||||
val ex_reg_xcpt = Reg(Bool())
|
||||
val ex_reg_flush_pipe = Reg(Bool())
|
||||
val ex_reg_load_use = Reg(Bool())
|
||||
val ex_reg_cause = Reg(UInt())
|
||||
val ex_reg_replay = Reg(Bool())
|
||||
val ex_reg_pc = Reg(UInt())
|
||||
val ex_reg_inst = Reg(Bits())
|
||||
|
||||
val mem_reg_xcpt_interrupt = Reg(Bool())
|
||||
val mem_reg_valid = Reg(Bool())
|
||||
val mem_reg_rvc = Reg(Bool())
|
||||
val mem_reg_btb_hit = Reg(Bool())
|
||||
val mem_reg_btb_resp = Reg(new BTBResp)
|
||||
val mem_reg_xcpt = Reg(Bool())
|
||||
val mem_reg_replay = Reg(Bool())
|
||||
val mem_reg_flush_pipe = Reg(Bool())
|
||||
val mem_reg_cause = Reg(UInt())
|
||||
val mem_reg_slow_bypass = Reg(Bool())
|
||||
val mem_reg_load = Reg(Bool())
|
||||
val mem_reg_store = Reg(Bool())
|
||||
val mem_reg_pc = Reg(UInt())
|
||||
val mem_reg_inst = Reg(Bits())
|
||||
val mem_reg_wdata = Reg(Bits())
|
||||
val mem_reg_rs2 = Reg(Bits())
|
||||
val take_pc_mem = Wire(Bool())
|
||||
|
||||
val wb_reg_valid = Reg(Bool())
|
||||
val wb_reg_xcpt = Reg(Bool())
|
||||
val wb_reg_replay = Reg(Bool())
|
||||
val wb_reg_cause = Reg(UInt())
|
||||
val wb_reg_pc = Reg(UInt())
|
||||
val wb_reg_inst = Reg(Bits())
|
||||
val wb_reg_wdata = Reg(Bits())
|
||||
val wb_reg_rs2 = Reg(Bits())
|
||||
val take_pc_wb = Wire(Bool())
|
||||
|
||||
val take_pc_mem_wb = take_pc_wb || take_pc_mem
|
||||
val take_pc = take_pc_mem_wb
|
||||
|
||||
// decode stage
|
||||
val ibuf = Module(new IBuf)
|
||||
val id_expanded_inst = ibuf.io.inst.map(_.bits.inst)
|
||||
val id_inst = id_expanded_inst.map(_.bits)
|
||||
ibuf.io.imem <> io.imem.resp
|
||||
ibuf.io.kill := take_pc
|
||||
|
||||
require(decodeWidth == 1 /* TODO */ && retireWidth == decodeWidth)
|
||||
val id_ctrl = Wire(new IntCtrlSigs()).decode(id_inst(0), decode_table)
|
||||
val id_raddr3 = id_expanded_inst(0).rs3
|
||||
val id_raddr2 = id_expanded_inst(0).rs2
|
||||
val id_raddr1 = id_expanded_inst(0).rs1
|
||||
val id_waddr = id_expanded_inst(0).rd
|
||||
val id_load_use = Wire(Bool())
|
||||
val id_reg_fence = Reg(init=Bool(false))
|
||||
val id_ren = IndexedSeq(id_ctrl.rxs1, id_ctrl.rxs2)
|
||||
val id_raddr = IndexedSeq(id_raddr1, id_raddr2)
|
||||
val rf = new RegFile(31, xLen)
|
||||
val id_rs = id_raddr.map(rf.read _)
|
||||
val ctrl_killd = Wire(Bool())
|
||||
|
||||
val csr = Module(new CSRFile)
|
||||
val id_csr_en = id_ctrl.csr =/= CSR.N
|
||||
val id_system_insn = id_ctrl.csr === CSR.I
|
||||
val id_csr_ren = (id_ctrl.csr === CSR.S || id_ctrl.csr === CSR.C) && id_raddr1 === UInt(0)
|
||||
val id_csr = Mux(id_csr_ren, CSR.R, id_ctrl.csr)
|
||||
val id_csr_addr = id_inst(0)(31,20)
|
||||
// this is overly conservative
|
||||
val safe_csrs = CSRs.sscratch :: CSRs.sepc :: CSRs.mscratch :: CSRs.mepc :: CSRs.mcause :: CSRs.mbadaddr :: Nil
|
||||
val legal_csrs = collection.mutable.LinkedHashSet(CSRs.all:_*)
|
||||
val id_csr_flush = id_system_insn || (id_csr_en && !id_csr_ren && !DecodeLogic(id_csr_addr, safe_csrs.map(UInt(_)), (legal_csrs -- safe_csrs).toList.map(UInt(_))))
|
||||
|
||||
val id_illegal_insn = !id_ctrl.legal ||
|
||||
id_ctrl.fp && !csr.io.status.fs.orR ||
|
||||
id_ctrl.rocc && !csr.io.status.xs.orR
|
||||
// stall decode for fences (now, for AMO.aq; later, for AMO.rl and FENCE)
|
||||
val id_amo_aq = id_inst(0)(26)
|
||||
val id_amo_rl = id_inst(0)(25)
|
||||
val id_fence_next = id_ctrl.fence || id_ctrl.amo && id_amo_rl
|
||||
val id_mem_busy = !io.dmem.ordered || io.dmem.req.valid
|
||||
val id_rocc_busy = Bool(usingRoCC) &&
|
||||
(io.rocc.busy || ex_reg_valid && ex_ctrl.rocc ||
|
||||
mem_reg_valid && mem_ctrl.rocc || wb_reg_valid && wb_ctrl.rocc)
|
||||
id_reg_fence := id_fence_next || id_reg_fence && id_mem_busy
|
||||
val id_do_fence = id_rocc_busy && id_ctrl.fence ||
|
||||
id_mem_busy && (id_ctrl.amo && id_amo_aq || id_ctrl.fence_i || id_reg_fence && (id_ctrl.mem || id_ctrl.rocc) || id_csr_en)
|
||||
|
||||
val bpu = Module(new BreakpointUnit)
|
||||
bpu.io.status := csr.io.status
|
||||
bpu.io.bp := csr.io.bp
|
||||
bpu.io.pc := ibuf.io.pc
|
||||
bpu.io.ea := mem_reg_wdata
|
||||
|
||||
val id_xcpt_if = ibuf.io.inst(0).bits.pf0 || ibuf.io.inst(0).bits.pf1
|
||||
val (id_xcpt, id_cause) = checkExceptions(List(
|
||||
(csr.io.interrupt, csr.io.interrupt_cause),
|
||||
(bpu.io.xcpt_if, UInt(Causes.breakpoint)),
|
||||
(id_xcpt_if, UInt(Causes.fault_fetch)),
|
||||
(id_illegal_insn, UInt(Causes.illegal_instruction))))
|
||||
|
||||
val dcache_bypass_data =
|
||||
if (fastLoadByte) io.dmem.resp.bits.data
|
||||
else if (fastLoadWord) io.dmem.resp.bits.data_word_bypass
|
||||
else wb_reg_wdata
|
||||
|
||||
// detect bypass opportunities
|
||||
val ex_waddr = ex_reg_inst(11,7)
|
||||
val mem_waddr = mem_reg_inst(11,7)
|
||||
val wb_waddr = wb_reg_inst(11,7)
|
||||
val bypass_sources = IndexedSeq(
|
||||
(Bool(true), UInt(0), UInt(0)), // treat reading x0 as a bypass
|
||||
(ex_reg_valid && ex_ctrl.wxd, ex_waddr, mem_reg_wdata),
|
||||
(mem_reg_valid && mem_ctrl.wxd && !mem_ctrl.mem, mem_waddr, wb_reg_wdata),
|
||||
(mem_reg_valid && mem_ctrl.wxd, mem_waddr, dcache_bypass_data))
|
||||
val id_bypass_src = id_raddr.map(raddr => bypass_sources.map(s => s._1 && s._2 === raddr))
|
||||
|
||||
// execute stage
|
||||
val bypass_mux = Vec(bypass_sources.map(_._3))
|
||||
val ex_reg_rs_bypass = Reg(Vec(id_raddr.size, Bool()))
|
||||
val ex_reg_rs_lsb = Reg(Vec(id_raddr.size, UInt()))
|
||||
val ex_reg_rs_msb = Reg(Vec(id_raddr.size, UInt()))
|
||||
val ex_rs = for (i <- 0 until id_raddr.size)
|
||||
yield Mux(ex_reg_rs_bypass(i), bypass_mux(ex_reg_rs_lsb(i)), Cat(ex_reg_rs_msb(i), ex_reg_rs_lsb(i)))
|
||||
val ex_imm = ImmGen(ex_ctrl.sel_imm, ex_reg_inst)
|
||||
val ex_op1 = MuxLookup(ex_ctrl.sel_alu1, SInt(0), Seq(
|
||||
A1_RS1 -> ex_rs(0).asSInt,
|
||||
A1_PC -> ex_reg_pc.asSInt))
|
||||
val ex_op2 = MuxLookup(ex_ctrl.sel_alu2, SInt(0), Seq(
|
||||
A2_RS2 -> ex_rs(1).asSInt,
|
||||
A2_IMM -> ex_imm,
|
||||
A2_SIZE -> Mux(ex_reg_rvc, SInt(2), SInt(4))))
|
||||
|
||||
val alu = Module(new ALU)
|
||||
alu.io.dw := ex_ctrl.alu_dw
|
||||
alu.io.fn := ex_ctrl.alu_fn
|
||||
alu.io.in2 := ex_op2.asUInt
|
||||
alu.io.in1 := ex_op1.asUInt
|
||||
|
||||
// multiplier and divider
|
||||
val div = Module(new MulDiv(p(MulDivKey).getOrElse(MulDivConfig()), width = xLen))
|
||||
div.io.req.valid := ex_reg_valid && ex_ctrl.div
|
||||
div.io.req.bits.dw := ex_ctrl.alu_dw
|
||||
div.io.req.bits.fn := ex_ctrl.alu_fn
|
||||
div.io.req.bits.in1 := ex_rs(0)
|
||||
div.io.req.bits.in2 := ex_rs(1)
|
||||
div.io.req.bits.tag := ex_waddr
|
||||
|
||||
ex_reg_valid := !ctrl_killd
|
||||
ex_reg_replay := !take_pc && ibuf.io.inst(0).valid && ibuf.io.inst(0).bits.replay
|
||||
ex_reg_xcpt := !ctrl_killd && id_xcpt
|
||||
ex_reg_xcpt_interrupt := !take_pc && ibuf.io.inst(0).valid && csr.io.interrupt
|
||||
when (id_xcpt) { ex_reg_cause := id_cause }
|
||||
ex_reg_btb_hit := ibuf.io.inst(0).bits.btb_hit
|
||||
when (ibuf.io.inst(0).bits.btb_hit) { ex_reg_btb_resp := ibuf.io.btb_resp }
|
||||
|
||||
when (!ctrl_killd) {
|
||||
ex_ctrl := id_ctrl
|
||||
ex_reg_rvc := ibuf.io.inst(0).bits.rvc
|
||||
ex_ctrl.csr := id_csr
|
||||
when (id_xcpt) { // pass PC down ALU writeback pipeline for badaddr
|
||||
ex_ctrl.alu_fn := ALU.FN_ADD
|
||||
ex_ctrl.alu_dw := DW_XPR
|
||||
ex_ctrl.sel_alu1 := A1_PC
|
||||
ex_ctrl.sel_alu2 := A2_ZERO
|
||||
when (!bpu.io.xcpt_if && !ibuf.io.inst(0).bits.pf0 && ibuf.io.inst(0).bits.pf1) { // PC+2
|
||||
ex_ctrl.sel_alu2 := A2_SIZE
|
||||
ex_reg_rvc := true
|
||||
}
|
||||
}
|
||||
ex_reg_flush_pipe := id_ctrl.fence_i || id_csr_flush || csr.io.singleStep
|
||||
ex_reg_load_use := id_load_use
|
||||
|
||||
when (id_ctrl.jalr && csr.io.status.debug) {
|
||||
ex_reg_flush_pipe := true
|
||||
ex_ctrl.fence_i := true
|
||||
}
|
||||
|
||||
for (i <- 0 until id_raddr.size) {
|
||||
val do_bypass = id_bypass_src(i).reduce(_||_)
|
||||
val bypass_src = PriorityEncoder(id_bypass_src(i))
|
||||
ex_reg_rs_bypass(i) := do_bypass
|
||||
ex_reg_rs_lsb(i) := bypass_src
|
||||
when (id_ren(i) && !do_bypass) {
|
||||
ex_reg_rs_lsb(i) := id_rs(i)(bypass_src.getWidth-1,0)
|
||||
ex_reg_rs_msb(i) := id_rs(i) >> bypass_src.getWidth
|
||||
}
|
||||
}
|
||||
}
|
||||
when (!ctrl_killd || csr.io.interrupt || ibuf.io.inst(0).bits.replay) {
|
||||
ex_reg_inst := id_inst(0)
|
||||
ex_reg_pc := ibuf.io.pc
|
||||
}
|
||||
|
||||
// replay inst in ex stage?
|
||||
val ex_pc_valid = ex_reg_valid || ex_reg_replay || ex_reg_xcpt_interrupt
|
||||
val wb_dcache_miss = wb_ctrl.mem && !io.dmem.resp.valid
|
||||
val replay_ex_structural = ex_ctrl.mem && !io.dmem.req.ready ||
|
||||
ex_ctrl.div && !div.io.req.ready
|
||||
val replay_ex_load_use = wb_dcache_miss && ex_reg_load_use
|
||||
val replay_ex = ex_reg_replay || (ex_reg_valid && (replay_ex_structural || replay_ex_load_use))
|
||||
val ctrl_killx = take_pc_mem_wb || replay_ex || !ex_reg_valid
|
||||
// detect 2-cycle load-use delay for LB/LH/SC
|
||||
val ex_slow_bypass = ex_ctrl.mem_cmd === M_XSC || Vec(MT_B, MT_BU, MT_H, MT_HU).contains(ex_ctrl.mem_type)
|
||||
|
||||
val (ex_xcpt, ex_cause) = checkExceptions(List(
|
||||
(ex_reg_xcpt_interrupt || ex_reg_xcpt, ex_reg_cause),
|
||||
(ex_ctrl.fp && io.fpu.illegal_rm, UInt(Causes.illegal_instruction))))
|
||||
|
||||
// memory stage
|
||||
val mem_br_taken = mem_reg_wdata(0)
|
||||
val mem_br_target = mem_reg_pc.asSInt +
|
||||
Mux(mem_ctrl.branch && mem_br_taken, ImmGen(IMM_SB, mem_reg_inst),
|
||||
Mux(mem_ctrl.jal, ImmGen(IMM_UJ, mem_reg_inst),
|
||||
Mux(mem_reg_rvc, SInt(2), SInt(4))))
|
||||
val mem_npc = (Mux(mem_ctrl.jalr, encodeVirtualAddress(mem_reg_wdata, mem_reg_wdata).asSInt, mem_br_target) & SInt(-2)).asUInt
|
||||
val mem_wrong_npc = Mux(ex_pc_valid, mem_npc =/= ex_reg_pc, Mux(ibuf.io.inst(0).valid, mem_npc =/= ibuf.io.pc, Bool(true)))
|
||||
val mem_npc_misaligned = if (usingCompressed) Bool(false) else mem_npc(1)
|
||||
val mem_int_wdata = Mux(!mem_reg_xcpt && (mem_ctrl.jalr ^ mem_npc_misaligned), mem_br_target, mem_reg_wdata.asSInt).asUInt
|
||||
val mem_cfi = mem_ctrl.branch || mem_ctrl.jalr || mem_ctrl.jal
|
||||
val mem_cfi_taken = (mem_ctrl.branch && mem_br_taken) || mem_ctrl.jalr || mem_ctrl.jal
|
||||
val mem_misprediction =
|
||||
if (p(BtbKey).nEntries == 0) mem_cfi_taken
|
||||
else mem_wrong_npc
|
||||
take_pc_mem := mem_reg_valid && (mem_misprediction || mem_reg_flush_pipe)
|
||||
|
||||
mem_reg_valid := !ctrl_killx
|
||||
mem_reg_replay := !take_pc_mem_wb && replay_ex
|
||||
mem_reg_xcpt := !ctrl_killx && ex_xcpt
|
||||
mem_reg_xcpt_interrupt := !take_pc_mem_wb && ex_reg_xcpt_interrupt
|
||||
when (ex_xcpt) { mem_reg_cause := ex_cause }
|
||||
|
||||
when (ex_pc_valid) {
|
||||
mem_ctrl := ex_ctrl
|
||||
mem_reg_rvc := ex_reg_rvc
|
||||
mem_reg_load := ex_ctrl.mem && isRead(ex_ctrl.mem_cmd)
|
||||
mem_reg_store := ex_ctrl.mem && isWrite(ex_ctrl.mem_cmd)
|
||||
mem_reg_btb_hit := ex_reg_btb_hit
|
||||
when (ex_reg_btb_hit) { mem_reg_btb_resp := ex_reg_btb_resp }
|
||||
mem_reg_flush_pipe := ex_reg_flush_pipe
|
||||
mem_reg_slow_bypass := ex_slow_bypass
|
||||
|
||||
mem_reg_inst := ex_reg_inst
|
||||
mem_reg_pc := ex_reg_pc
|
||||
mem_reg_wdata := alu.io.out
|
||||
when (ex_ctrl.rxs2 && (ex_ctrl.mem || ex_ctrl.rocc)) {
|
||||
mem_reg_rs2 := ex_rs(1)
|
||||
}
|
||||
}
|
||||
|
||||
val mem_breakpoint = (mem_reg_load && bpu.io.xcpt_ld) || (mem_reg_store && bpu.io.xcpt_st)
|
||||
val (mem_new_xcpt, mem_new_cause) = checkExceptions(List(
|
||||
(mem_breakpoint, UInt(Causes.breakpoint)),
|
||||
(mem_npc_misaligned, UInt(Causes.misaligned_fetch)),
|
||||
(mem_ctrl.mem && io.dmem.xcpt.ma.st, UInt(Causes.misaligned_store)),
|
||||
(mem_ctrl.mem && io.dmem.xcpt.ma.ld, UInt(Causes.misaligned_load)),
|
||||
(mem_ctrl.mem && io.dmem.xcpt.pf.st, UInt(Causes.fault_store)),
|
||||
(mem_ctrl.mem && io.dmem.xcpt.pf.ld, UInt(Causes.fault_load))))
|
||||
|
||||
val (mem_xcpt, mem_cause) = checkExceptions(List(
|
||||
(mem_reg_xcpt_interrupt || mem_reg_xcpt, mem_reg_cause),
|
||||
(mem_reg_valid && mem_new_xcpt, mem_new_cause)))
|
||||
|
||||
val dcache_kill_mem = mem_reg_valid && mem_ctrl.wxd && io.dmem.replay_next // structural hazard on writeback port
|
||||
val fpu_kill_mem = mem_reg_valid && mem_ctrl.fp && io.fpu.nack_mem
|
||||
val replay_mem = dcache_kill_mem || mem_reg_replay || fpu_kill_mem
|
||||
val killm_common = dcache_kill_mem || take_pc_wb || mem_reg_xcpt || !mem_reg_valid
|
||||
div.io.kill := killm_common && Reg(next = div.io.req.fire())
|
||||
val ctrl_killm = killm_common || mem_xcpt || fpu_kill_mem
|
||||
|
||||
// writeback stage
|
||||
wb_reg_valid := !ctrl_killm
|
||||
wb_reg_replay := replay_mem && !take_pc_wb
|
||||
wb_reg_xcpt := mem_xcpt && !take_pc_wb
|
||||
when (mem_xcpt) { wb_reg_cause := mem_cause }
|
||||
when (mem_reg_valid || mem_reg_replay || mem_reg_xcpt_interrupt) {
|
||||
wb_ctrl := mem_ctrl
|
||||
wb_reg_wdata := Mux(!mem_reg_xcpt && mem_ctrl.fp && mem_ctrl.wxd, io.fpu.toint_data, mem_int_wdata)
|
||||
when (mem_ctrl.rocc) {
|
||||
wb_reg_rs2 := mem_reg_rs2
|
||||
}
|
||||
wb_reg_inst := mem_reg_inst
|
||||
wb_reg_pc := mem_reg_pc
|
||||
}
|
||||
|
||||
val wb_set_sboard = wb_ctrl.div || wb_dcache_miss || wb_ctrl.rocc
|
||||
val replay_wb_common = io.dmem.s2_nack || wb_reg_replay
|
||||
val replay_wb_rocc = wb_reg_valid && wb_ctrl.rocc && !io.rocc.cmd.ready
|
||||
val replay_wb = replay_wb_common || replay_wb_rocc
|
||||
val wb_xcpt = wb_reg_xcpt || csr.io.csr_xcpt
|
||||
take_pc_wb := replay_wb || wb_xcpt || csr.io.eret
|
||||
|
||||
// writeback arbitration
|
||||
val dmem_resp_xpu = !io.dmem.resp.bits.tag(0).toBool
|
||||
val dmem_resp_fpu = io.dmem.resp.bits.tag(0).toBool
|
||||
val dmem_resp_waddr = io.dmem.resp.bits.tag(5, 1)
|
||||
val dmem_resp_valid = io.dmem.resp.valid && io.dmem.resp.bits.has_data
|
||||
val dmem_resp_replay = dmem_resp_valid && io.dmem.resp.bits.replay
|
||||
|
||||
div.io.resp.ready := !(wb_reg_valid && wb_ctrl.wxd)
|
||||
val ll_wdata = Wire(init = div.io.resp.bits.data)
|
||||
val ll_waddr = Wire(init = div.io.resp.bits.tag)
|
||||
val ll_wen = Wire(init = div.io.resp.fire())
|
||||
if (usingRoCC) {
|
||||
io.rocc.resp.ready := !(wb_reg_valid && wb_ctrl.wxd)
|
||||
when (io.rocc.resp.fire()) {
|
||||
div.io.resp.ready := Bool(false)
|
||||
ll_wdata := io.rocc.resp.bits.data
|
||||
ll_waddr := io.rocc.resp.bits.rd
|
||||
ll_wen := Bool(true)
|
||||
}
|
||||
}
|
||||
when (dmem_resp_replay && dmem_resp_xpu) {
|
||||
div.io.resp.ready := Bool(false)
|
||||
if (usingRoCC)
|
||||
io.rocc.resp.ready := Bool(false)
|
||||
ll_waddr := dmem_resp_waddr
|
||||
ll_wen := Bool(true)
|
||||
}
|
||||
|
||||
val wb_valid = wb_reg_valid && !replay_wb && !wb_xcpt
|
||||
val wb_wen = wb_valid && wb_ctrl.wxd
|
||||
val rf_wen = wb_wen || ll_wen
|
||||
val rf_waddr = Mux(ll_wen, ll_waddr, wb_waddr)
|
||||
val rf_wdata = Mux(dmem_resp_valid && dmem_resp_xpu, io.dmem.resp.bits.data,
|
||||
Mux(ll_wen, ll_wdata,
|
||||
Mux(wb_ctrl.csr =/= CSR.N, csr.io.rw.rdata,
|
||||
wb_reg_wdata)))
|
||||
when (rf_wen) { rf.write(rf_waddr, rf_wdata) }
|
||||
|
||||
// hook up control/status regfile
|
||||
csr.io.exception := wb_reg_xcpt
|
||||
csr.io.cause := wb_reg_cause
|
||||
csr.io.retire := wb_valid
|
||||
csr.io.prci <> io.prci
|
||||
io.fpu.fcsr_rm := csr.io.fcsr_rm
|
||||
csr.io.fcsr_flags := io.fpu.fcsr_flags
|
||||
csr.io.rocc.interrupt <> io.rocc.interrupt
|
||||
csr.io.pc := wb_reg_pc
|
||||
csr.io.badaddr := encodeVirtualAddress(wb_reg_wdata, wb_reg_wdata)
|
||||
io.ptw.ptbr := csr.io.ptbr
|
||||
io.ptw.invalidate := csr.io.fatc
|
||||
io.ptw.status := csr.io.status
|
||||
csr.io.rw.addr := wb_reg_inst(31,20)
|
||||
csr.io.rw.cmd := Mux(wb_reg_valid, wb_ctrl.csr, CSR.N)
|
||||
csr.io.rw.wdata := wb_reg_wdata
|
||||
|
||||
val hazard_targets = Seq((id_ctrl.rxs1 && id_raddr1 =/= UInt(0), id_raddr1),
|
||||
(id_ctrl.rxs2 && id_raddr2 =/= UInt(0), id_raddr2),
|
||||
(id_ctrl.wxd && id_waddr =/= UInt(0), id_waddr))
|
||||
val fp_hazard_targets = Seq((io.fpu.dec.ren1, id_raddr1),
|
||||
(io.fpu.dec.ren2, id_raddr2),
|
||||
(io.fpu.dec.ren3, id_raddr3),
|
||||
(io.fpu.dec.wen, id_waddr))
|
||||
|
||||
val sboard = new Scoreboard(32, true)
|
||||
sboard.clear(ll_wen, ll_waddr)
|
||||
val id_sboard_hazard = checkHazards(hazard_targets, sboard.read _)
|
||||
sboard.set(wb_set_sboard && wb_wen, wb_waddr)
|
||||
|
||||
// stall for RAW/WAW hazards on CSRs, loads, AMOs, and mul/div in execute stage.
|
||||
val ex_cannot_bypass = ex_ctrl.csr =/= CSR.N || ex_ctrl.jalr || ex_ctrl.mem || ex_ctrl.div || ex_ctrl.fp || ex_ctrl.rocc
|
||||
val data_hazard_ex = ex_ctrl.wxd && checkHazards(hazard_targets, _ === ex_waddr)
|
||||
val fp_data_hazard_ex = ex_ctrl.wfd && checkHazards(fp_hazard_targets, _ === ex_waddr)
|
||||
val id_ex_hazard = ex_reg_valid && (data_hazard_ex && ex_cannot_bypass || fp_data_hazard_ex)
|
||||
|
||||
// stall for RAW/WAW hazards on CSRs, LB/LH, and mul/div in memory stage.
|
||||
val mem_mem_cmd_bh =
|
||||
if (fastLoadWord) Bool(!fastLoadByte) && mem_reg_slow_bypass
|
||||
else Bool(true)
|
||||
val mem_cannot_bypass = mem_ctrl.csr =/= CSR.N || mem_ctrl.mem && mem_mem_cmd_bh || mem_ctrl.div || mem_ctrl.fp || mem_ctrl.rocc
|
||||
val data_hazard_mem = mem_ctrl.wxd && checkHazards(hazard_targets, _ === mem_waddr)
|
||||
val fp_data_hazard_mem = mem_ctrl.wfd && checkHazards(fp_hazard_targets, _ === mem_waddr)
|
||||
val id_mem_hazard = mem_reg_valid && (data_hazard_mem && mem_cannot_bypass || fp_data_hazard_mem)
|
||||
id_load_use := mem_reg_valid && data_hazard_mem && mem_ctrl.mem
|
||||
|
||||
// stall for RAW/WAW hazards on load/AMO misses and mul/div in writeback.
|
||||
val data_hazard_wb = wb_ctrl.wxd && checkHazards(hazard_targets, _ === wb_waddr)
|
||||
val fp_data_hazard_wb = wb_ctrl.wfd && checkHazards(fp_hazard_targets, _ === wb_waddr)
|
||||
val id_wb_hazard = wb_reg_valid && (data_hazard_wb && wb_set_sboard || fp_data_hazard_wb)
|
||||
|
||||
val id_stall_fpu = if (usingFPU) {
|
||||
val fp_sboard = new Scoreboard(32)
|
||||
fp_sboard.set((wb_dcache_miss && wb_ctrl.wfd || io.fpu.sboard_set) && wb_valid, wb_waddr)
|
||||
fp_sboard.clear(dmem_resp_replay && dmem_resp_fpu, dmem_resp_waddr)
|
||||
fp_sboard.clear(io.fpu.sboard_clr, io.fpu.sboard_clra)
|
||||
|
||||
id_csr_en && !io.fpu.fcsr_rdy || checkHazards(fp_hazard_targets, fp_sboard.read _)
|
||||
} else Bool(false)
|
||||
|
||||
val dcache_blocked = Reg(Bool())
|
||||
dcache_blocked := !io.dmem.req.ready && (io.dmem.req.valid || dcache_blocked)
|
||||
val rocc_blocked = Reg(Bool())
|
||||
rocc_blocked := !wb_reg_xcpt && !io.rocc.cmd.ready && (io.rocc.cmd.valid || rocc_blocked)
|
||||
|
||||
val ctrl_stalld =
|
||||
id_ex_hazard || id_mem_hazard || id_wb_hazard || id_sboard_hazard ||
|
||||
id_ctrl.fp && id_stall_fpu ||
|
||||
id_ctrl.mem && dcache_blocked || // reduce activity during D$ misses
|
||||
id_ctrl.rocc && rocc_blocked || // reduce activity while RoCC is busy
|
||||
id_do_fence ||
|
||||
csr.io.csr_stall
|
||||
ctrl_killd := !ibuf.io.inst(0).valid || ibuf.io.inst(0).bits.replay || take_pc || ctrl_stalld || csr.io.interrupt
|
||||
|
||||
io.imem.req.valid := take_pc
|
||||
io.imem.req.bits.speculative := !take_pc_wb
|
||||
io.imem.req.bits.pc :=
|
||||
Mux(wb_xcpt || csr.io.eret, csr.io.evec, // exception or [m|s]ret
|
||||
Mux(replay_wb, wb_reg_pc, // replay
|
||||
mem_npc)) // mispredicted branch
|
||||
io.imem.flush_icache := wb_reg_valid && wb_ctrl.fence_i && !io.dmem.s2_nack
|
||||
io.imem.flush_tlb := csr.io.fatc
|
||||
|
||||
ibuf.io.inst(0).ready := !ctrl_stalld || csr.io.interrupt
|
||||
|
||||
io.imem.btb_update.valid := (mem_reg_replay && mem_reg_btb_hit) || (mem_reg_valid && !take_pc_wb && (mem_cfi_taken || !mem_cfi) && mem_wrong_npc)
|
||||
io.imem.btb_update.bits.isValid := !mem_reg_replay && mem_cfi
|
||||
io.imem.btb_update.bits.isJump := mem_ctrl.jal || mem_ctrl.jalr
|
||||
io.imem.btb_update.bits.isReturn := mem_ctrl.jalr && mem_reg_inst(19,15) === BitPat("b00??1")
|
||||
io.imem.btb_update.bits.target := io.imem.req.bits.pc
|
||||
io.imem.btb_update.bits.br_pc := (if (usingCompressed) mem_reg_pc + Mux(mem_reg_rvc, UInt(0), UInt(2)) else mem_reg_pc)
|
||||
io.imem.btb_update.bits.pc := ~(~io.imem.btb_update.bits.br_pc | (coreInstBytes*fetchWidth-1))
|
||||
io.imem.btb_update.bits.prediction.valid := mem_reg_btb_hit
|
||||
io.imem.btb_update.bits.prediction.bits := mem_reg_btb_resp
|
||||
|
||||
io.imem.bht_update.valid := mem_reg_valid && !take_pc_wb && mem_ctrl.branch
|
||||
io.imem.bht_update.bits.pc := io.imem.btb_update.bits.pc
|
||||
io.imem.bht_update.bits.taken := mem_br_taken
|
||||
io.imem.bht_update.bits.mispredict := mem_wrong_npc
|
||||
io.imem.bht_update.bits.prediction := io.imem.btb_update.bits.prediction
|
||||
|
||||
io.imem.ras_update.valid := mem_reg_valid && !take_pc_wb
|
||||
io.imem.ras_update.bits.returnAddr := mem_int_wdata
|
||||
io.imem.ras_update.bits.isCall := io.imem.btb_update.bits.isJump && mem_waddr(0)
|
||||
io.imem.ras_update.bits.isReturn := io.imem.btb_update.bits.isReturn
|
||||
io.imem.ras_update.bits.prediction := io.imem.btb_update.bits.prediction
|
||||
|
||||
io.fpu.valid := !ctrl_killd && id_ctrl.fp
|
||||
io.fpu.killx := ctrl_killx
|
||||
io.fpu.killm := killm_common
|
||||
io.fpu.inst := id_inst(0)
|
||||
io.fpu.fromint_data := ex_rs(0)
|
||||
io.fpu.dmem_resp_val := dmem_resp_valid && dmem_resp_fpu
|
||||
io.fpu.dmem_resp_data := io.dmem.resp.bits.data_word_bypass
|
||||
io.fpu.dmem_resp_type := io.dmem.resp.bits.typ
|
||||
io.fpu.dmem_resp_tag := dmem_resp_waddr
|
||||
|
||||
io.dmem.req.valid := ex_reg_valid && ex_ctrl.mem
|
||||
val ex_dcache_tag = Cat(ex_waddr, ex_ctrl.fp)
|
||||
require(coreDCacheReqTagBits >= ex_dcache_tag.getWidth)
|
||||
io.dmem.req.bits.tag := ex_dcache_tag
|
||||
io.dmem.req.bits.cmd := ex_ctrl.mem_cmd
|
||||
io.dmem.req.bits.typ := ex_ctrl.mem_type
|
||||
io.dmem.req.bits.phys := Bool(false)
|
||||
io.dmem.req.bits.addr := encodeVirtualAddress(ex_rs(0), alu.io.adder_out)
|
||||
io.dmem.invalidate_lr := wb_xcpt
|
||||
io.dmem.s1_data := Mux(mem_ctrl.fp, io.fpu.store_data, mem_reg_rs2)
|
||||
io.dmem.s1_kill := killm_common || mem_breakpoint
|
||||
when (mem_xcpt && !io.dmem.s1_kill) {
|
||||
assert(io.dmem.xcpt.asUInt.orR) // make sure s1_kill is exhaustive
|
||||
}
|
||||
|
||||
io.rocc.cmd.valid := wb_reg_valid && wb_ctrl.rocc && !replay_wb_common
|
||||
io.rocc.exception := wb_xcpt && csr.io.status.xs.orR
|
||||
io.rocc.cmd.bits.status := csr.io.status
|
||||
io.rocc.cmd.bits.inst := new RoCCInstruction().fromBits(wb_reg_inst)
|
||||
io.rocc.cmd.bits.rs1 := wb_reg_wdata
|
||||
io.rocc.cmd.bits.rs2 := wb_reg_rs2
|
||||
|
||||
if (enableCommitLog) {
|
||||
val pc = Wire(SInt(width=xLen))
|
||||
pc := wb_reg_pc
|
||||
val inst = wb_reg_inst
|
||||
val rd = RegNext(RegNext(RegNext(id_waddr)))
|
||||
val wfd = wb_ctrl.wfd
|
||||
val wxd = wb_ctrl.wxd
|
||||
val has_data = wb_wen && !wb_set_sboard
|
||||
val priv = csr.io.status.prv
|
||||
|
||||
when (wb_valid) {
|
||||
when (wfd) {
|
||||
printf ("%d 0x%x (0x%x) f%d p%d 0xXXXXXXXXXXXXXXXX\n", priv, pc, inst, rd, rd+UInt(32))
|
||||
}
|
||||
.elsewhen (wxd && rd =/= UInt(0) && has_data) {
|
||||
printf ("%d 0x%x (0x%x) x%d 0x%x\n", priv, pc, inst, rd, rf_wdata)
|
||||
}
|
||||
.elsewhen (wxd && rd =/= UInt(0) && !has_data) {
|
||||
printf ("%d 0x%x (0x%x) x%d p%d 0xXXXXXXXXXXXXXXXX\n", priv, pc, inst, rd, rd)
|
||||
}
|
||||
.otherwise {
|
||||
printf ("%d 0x%x (0x%x)\n", priv, pc, inst)
|
||||
}
|
||||
}
|
||||
|
||||
when (ll_wen && rf_waddr =/= UInt(0)) {
|
||||
printf ("x%d p%d 0x%x\n", rf_waddr, rf_waddr, rf_wdata)
|
||||
}
|
||||
}
|
||||
else {
|
||||
printf("C%d: %d [%d] pc=[%x] W[r%d=%x][%d] R[r%d=%x] R[r%d=%x] inst=[%x] DASM(%x)\n",
|
||||
io.prci.id, csr.io.time(31,0), wb_valid, wb_reg_pc,
|
||||
Mux(rf_wen, rf_waddr, UInt(0)), rf_wdata, rf_wen,
|
||||
wb_reg_inst(19,15), Reg(next=Reg(next=ex_rs(0))),
|
||||
wb_reg_inst(24,20), Reg(next=Reg(next=ex_rs(1))),
|
||||
wb_reg_inst, wb_reg_inst)
|
||||
}
|
||||
|
||||
def checkExceptions(x: Seq[(Bool, UInt)]) =
|
||||
(x.map(_._1).reduce(_||_), PriorityMux(x))
|
||||
|
||||
def checkHazards(targets: Seq[(Bool, UInt)], cond: UInt => Bool) =
|
||||
targets.map(h => h._1 && cond(h._2)).reduce(_||_)
|
||||
|
||||
def encodeVirtualAddress(a0: UInt, ea: UInt) = if (vaddrBitsExtended == vaddrBits) ea else {
|
||||
// efficient means to compress 64-bit VA into vaddrBits+1 bits
|
||||
// (VA is bad if VA(vaddrBits) != VA(vaddrBits-1))
|
||||
val a = a0 >> vaddrBits-1
|
||||
val e = ea(vaddrBits,vaddrBits-1).asSInt
|
||||
val msb =
|
||||
Mux(a === UInt(0) || a === UInt(1), e =/= SInt(0),
|
||||
Mux(a.asSInt === SInt(-1) || a.asSInt === SInt(-2), e === SInt(-1), e(0)))
|
||||
Cat(msb, ea(vaddrBits-1,0))
|
||||
}
|
||||
|
||||
class Scoreboard(n: Int, zero: Boolean = false)
|
||||
{
|
||||
def set(en: Bool, addr: UInt): Unit = update(en, _next | mask(en, addr))
|
||||
def clear(en: Bool, addr: UInt): Unit = update(en, _next & ~mask(en, addr))
|
||||
def read(addr: UInt): Bool = r(addr)
|
||||
def readBypassed(addr: UInt): Bool = _next(addr)
|
||||
|
||||
private val _r = Reg(init=Bits(0, n))
|
||||
private val r = if (zero) (_r >> 1 << 1) else _r
|
||||
private var _next = r
|
||||
private var ens = Bool(false)
|
||||
private def mask(en: Bool, addr: UInt) = Mux(en, UInt(1) << addr, UInt(0))
|
||||
private def update(en: Bool, update: UInt) = {
|
||||
_next = update
|
||||
ens = ens || en
|
||||
when (ens) { _r := _next }
|
||||
}
|
||||
}
|
||||
}
|
||||
166
src/main/scala/rocket/rvc.scala
Normal file
166
src/main/scala/rocket/rvc.scala
Normal file
@@ -0,0 +1,166 @@
|
||||
package rocket
|
||||
|
||||
import Chisel._
|
||||
import Chisel.ImplicitConversions._
|
||||
import Util._
|
||||
import cde.Parameters
|
||||
import uncore.util._
|
||||
|
||||
class ExpandedInstruction extends Bundle {
|
||||
val bits = UInt(width = 32)
|
||||
val rd = UInt(width = 5)
|
||||
val rs1 = UInt(width = 5)
|
||||
val rs2 = UInt(width = 5)
|
||||
val rs3 = UInt(width = 5)
|
||||
}
|
||||
|
||||
class RVCDecoder(x: UInt)(implicit p: Parameters) {
|
||||
def inst(bits: UInt, rd: UInt = x(11,7), rs1: UInt = x(19,15), rs2: UInt = x(24,20), rs3: UInt = x(31,27)) = {
|
||||
val res = Wire(new ExpandedInstruction)
|
||||
res.bits := bits
|
||||
res.rd := rd
|
||||
res.rs1 := rs1
|
||||
res.rs2 := rs2
|
||||
res.rs3 := rs3
|
||||
res
|
||||
}
|
||||
|
||||
def rs1p = Cat(UInt(1,2), x(9,7))
|
||||
def rs2p = Cat(UInt(1,2), x(4,2))
|
||||
def rs2 = x(6,2)
|
||||
def rd = x(11,7)
|
||||
def addi4spnImm = Cat(x(10,7), x(12,11), x(5), x(6), UInt(0,2))
|
||||
def lwImm = Cat(x(5), x(12,10), x(6), UInt(0,2))
|
||||
def ldImm = Cat(x(6,5), x(12,10), UInt(0,3))
|
||||
def lwspImm = Cat(x(3,2), x(12), x(6,4), UInt(0,2))
|
||||
def ldspImm = Cat(x(4,2), x(12), x(6,5), UInt(0,3))
|
||||
def swspImm = Cat(x(8,7), x(12,9), UInt(0,2))
|
||||
def sdspImm = Cat(x(9,7), x(12,10), UInt(0,3))
|
||||
def luiImm = Cat(Fill(15, x(12)), x(6,2), UInt(0,12))
|
||||
def addi16spImm = Cat(Fill(3, x(12)), x(4,3), x(5), x(2), x(6), UInt(0,4))
|
||||
def addiImm = Cat(Fill(7, x(12)), x(6,2))
|
||||
def jImm = Cat(Fill(10, x(12)), x(8), x(10,9), x(6), x(7), x(2), x(11), x(5,3), UInt(0,1))
|
||||
def bImm = Cat(Fill(5, x(12)), x(6,5), x(2), x(11,10), x(4,3), UInt(0,1))
|
||||
def shamt = Cat(x(12), x(6,2))
|
||||
def x0 = UInt(0,5)
|
||||
def ra = UInt(1,5)
|
||||
def sp = UInt(2,5)
|
||||
|
||||
def q0 = {
|
||||
def addi4spn = {
|
||||
val opc = Mux(x(12,5).orR, UInt(0x13,7), UInt(0x1F,7))
|
||||
inst(Cat(addi4spnImm, sp, UInt(0,3), rs2p, opc), rs2p, sp, rs2p)
|
||||
}
|
||||
def ld = inst(Cat(ldImm, rs1p, UInt(3,3), rs2p, UInt(0x03,7)), rs2p, rs1p, rs2p)
|
||||
def lw = inst(Cat(lwImm, rs1p, UInt(2,3), rs2p, UInt(0x03,7)), rs2p, rs1p, rs2p)
|
||||
def fld = inst(Cat(ldImm, rs1p, UInt(3,3), rs2p, UInt(0x07,7)), rs2p, rs1p, rs2p)
|
||||
def flw = {
|
||||
if (p(XLen) == 32) inst(Cat(lwImm, rs1p, UInt(2,3), rs2p, UInt(0x07,7)), rs2p, rs1p, rs2p)
|
||||
else ld
|
||||
}
|
||||
def unimp = inst(Cat(lwImm >> 5, rs2p, rs1p, UInt(2,3), lwImm(4,0), UInt(0x2F,7)), rs2p, rs1p, rs2p)
|
||||
def sd = inst(Cat(ldImm >> 5, rs2p, rs1p, UInt(3,3), ldImm(4,0), UInt(0x23,7)), rs2p, rs1p, rs2p)
|
||||
def sw = inst(Cat(lwImm >> 5, rs2p, rs1p, UInt(2,3), lwImm(4,0), UInt(0x23,7)), rs2p, rs1p, rs2p)
|
||||
def fsd = inst(Cat(ldImm >> 5, rs2p, rs1p, UInt(3,3), ldImm(4,0), UInt(0x27,7)), rs2p, rs1p, rs2p)
|
||||
def fsw = {
|
||||
if (p(XLen) == 32) inst(Cat(lwImm >> 5, rs2p, rs1p, UInt(2,3), lwImm(4,0), UInt(0x27,7)), rs2p, rs1p, rs2p)
|
||||
else sd
|
||||
}
|
||||
Seq(addi4spn, fld, lw, flw, unimp, fsd, sw, fsw)
|
||||
}
|
||||
|
||||
def q1 = {
|
||||
def addi = inst(Cat(addiImm, rd, UInt(0,3), rd, UInt(0x13,7)), rd, rd, rs2p)
|
||||
def addiw = {
|
||||
val opc = Mux(rd.orR, UInt(0x1B,7), UInt(0x1F,7))
|
||||
inst(Cat(addiImm, rd, UInt(0,3), rd, opc), rd, rd, rs2p)
|
||||
}
|
||||
def jal = {
|
||||
if (p(XLen) == 32) inst(Cat(jImm(20), jImm(10,1), jImm(11), jImm(19,12), ra, UInt(0x6F,7)), ra, rd, rs2p)
|
||||
else addiw
|
||||
}
|
||||
def li = inst(Cat(addiImm, x0, UInt(0,3), rd, UInt(0x13,7)), rd, x0, rs2p)
|
||||
def addi16sp = {
|
||||
val opc = Mux(addiImm.orR, UInt(0x13,7), UInt(0x1F,7))
|
||||
inst(Cat(addi16spImm, rd, UInt(0,3), rd, opc), rd, rd, rs2p)
|
||||
}
|
||||
def lui = {
|
||||
val opc = Mux(addiImm.orR, UInt(0x37,7), UInt(0x3F,7))
|
||||
val me = inst(Cat(luiImm(31,12), rd, opc), rd, rd, rs2p)
|
||||
Mux(rd === x0 || rd === sp, addi16sp, me)
|
||||
}
|
||||
def j = inst(Cat(jImm(20), jImm(10,1), jImm(11), jImm(19,12), x0, UInt(0x6F,7)), x0, rs1p, rs2p)
|
||||
def beqz = inst(Cat(bImm(12), bImm(10,5), x0, rs1p, UInt(0,3), bImm(4,1), bImm(11), UInt(0x63,7)), rs1p, rs1p, x0)
|
||||
def bnez = inst(Cat(bImm(12), bImm(10,5), x0, rs1p, UInt(1,3), bImm(4,1), bImm(11), UInt(0x63,7)), x0, rs1p, x0)
|
||||
def arith = {
|
||||
def srli = Cat(shamt, rs1p, UInt(5,3), rs1p, UInt(0x13,7))
|
||||
def srai = srli | UInt(1 << 30)
|
||||
def andi = Cat(addiImm, rs1p, UInt(7,3), rs1p, UInt(0x13,7))
|
||||
def rtype = {
|
||||
val funct = Seq(0.U, 4.U, 6.U, 7.U, 0.U, 0.U, 2.U, 3.U)(Cat(x(12), x(6,5)))
|
||||
val sub = Mux(x(6,5) === UInt(0), UInt(1 << 30), UInt(0))
|
||||
val opc = Mux(x(12), UInt(0x3B,7), UInt(0x33,7))
|
||||
Cat(rs2p, rs1p, funct, rs1p, opc) | sub
|
||||
}
|
||||
inst(Seq(srli, srai, andi, rtype)(x(11,10)), rs1p, rs1p, rs2p)
|
||||
}
|
||||
Seq(addi, jal, li, lui, arith, j, beqz, bnez)
|
||||
}
|
||||
|
||||
def q2 = {
|
||||
def slli = inst(Cat(shamt, rd, UInt(1,3), rd, UInt(0x13,7)), rd, rd, rs2)
|
||||
def ldsp = inst(Cat(ldspImm, sp, UInt(3,3), rd, UInt(0x03,7)), rd, sp, rs2)
|
||||
def lwsp = inst(Cat(lwspImm, sp, UInt(2,3), rd, UInt(0x03,7)), rd, sp, rs2)
|
||||
def fldsp = inst(Cat(ldspImm, sp, UInt(3,3), rd, UInt(0x07,7)), rd, sp, rs2)
|
||||
def flwsp = {
|
||||
if (p(XLen) == 32) inst(Cat(lwspImm, sp, UInt(2,3), rd, UInt(0x07,7)), rd, sp, rs2)
|
||||
else ldsp
|
||||
}
|
||||
def sdsp = inst(Cat(sdspImm >> 5, rs2, sp, UInt(3,3), sdspImm(4,0), UInt(0x23,7)), rd, sp, rs2)
|
||||
def swsp = inst(Cat(swspImm >> 5, rs2, sp, UInt(2,3), swspImm(4,0), UInt(0x23,7)), rd, sp, rs2)
|
||||
def fsdsp = inst(Cat(sdspImm >> 5, rs2, sp, UInt(3,3), sdspImm(4,0), UInt(0x27,7)), rd, sp, rs2)
|
||||
def fswsp = {
|
||||
if (p(XLen) == 32) inst(Cat(swspImm >> 5, rs2, sp, UInt(2,3), swspImm(4,0), UInt(0x27,7)), rd, sp, rs2)
|
||||
else sdsp
|
||||
}
|
||||
def jalr = {
|
||||
val mv = inst(Cat(rs2, x0, UInt(0,3), rd, UInt(0x33,7)), rd, x0, rs2)
|
||||
val add = inst(Cat(rs2, rd, UInt(0,3), rd, UInt(0x33,7)), rd, rd, rs2)
|
||||
val jr = Cat(rs2, rd, UInt(0,3), x0, UInt(0x67,7))
|
||||
val reserved = Cat(jr >> 7, UInt(0x1F,7))
|
||||
val jr_reserved = inst(Mux(rd.orR, jr, reserved), x0, rd, rs2)
|
||||
val jr_mv = Mux(rs2.orR, mv, jr_reserved)
|
||||
val jalr = Cat(rs2, rd, UInt(0,3), ra, UInt(0x67,7))
|
||||
val ebreak = Cat(jr >> 7, UInt(0x73,7)) | UInt(1 << 20)
|
||||
val jalr_ebreak = inst(Mux(rd.orR, jalr, ebreak), ra, rd, rs2)
|
||||
val jalr_add = Mux(rs2.orR, add, jalr_ebreak)
|
||||
Mux(x(12), jalr_add, jr_mv)
|
||||
}
|
||||
Seq(slli, fldsp, lwsp, flwsp, jalr, fsdsp, swsp, fswsp)
|
||||
}
|
||||
|
||||
def q3 = Seq.fill(8)(passthrough)
|
||||
|
||||
def passthrough = inst(x)
|
||||
|
||||
def decode = {
|
||||
val s = q0 ++ q1 ++ q2 ++ q3
|
||||
s(Cat(x(1,0), x(15,13)))
|
||||
}
|
||||
}
|
||||
|
||||
class RVCExpander(implicit p: Parameters) extends Module {
|
||||
val io = new Bundle {
|
||||
val in = UInt(INPUT, 32)
|
||||
val out = new ExpandedInstruction
|
||||
val rvc = Bool(OUTPUT)
|
||||
}
|
||||
|
||||
if (p(UseCompressed)) {
|
||||
io.rvc := io.in(1,0) =/= UInt(3)
|
||||
io.out := new RVCDecoder(io.in).decode
|
||||
} else {
|
||||
io.rvc := Bool(false)
|
||||
io.out := new RVCDecoder(io.in).passthrough
|
||||
}
|
||||
}
|
||||
134
src/main/scala/rocket/tile.scala
Normal file
134
src/main/scala/rocket/tile.scala
Normal file
@@ -0,0 +1,134 @@
|
||||
// See LICENSE for license details.
|
||||
|
||||
package rocket
|
||||
|
||||
import Chisel._
|
||||
import uncore.tilelink._
|
||||
import uncore.agents._
|
||||
import uncore.devices._
|
||||
import Util._
|
||||
import cde.{Parameters, Field}
|
||||
|
||||
case object BuildRoCC extends Field[Seq[RoccParameters]]
|
||||
case object NCachedTileLinkPorts extends Field[Int]
|
||||
case object NUncachedTileLinkPorts extends Field[Int]
|
||||
|
||||
case class RoccParameters(
|
||||
opcodes: OpcodeSet,
|
||||
generator: Parameters => RoCC,
|
||||
nMemChannels: Int = 0,
|
||||
nPTWPorts : Int = 0,
|
||||
useFPU: Boolean = false)
|
||||
|
||||
abstract class Tile(clockSignal: Clock = null, resetSignal: Bool = null)
|
||||
(implicit p: Parameters) extends Module(Option(clockSignal), Option(resetSignal)) {
|
||||
val nCachedTileLinkPorts = p(NCachedTileLinkPorts)
|
||||
val nUncachedTileLinkPorts = p(NUncachedTileLinkPorts)
|
||||
val dcacheParams = p.alterPartial({ case CacheName => "L1D" })
|
||||
|
||||
class TileIO extends Bundle {
|
||||
val cached = Vec(nCachedTileLinkPorts, new ClientTileLinkIO)
|
||||
val uncached = Vec(nUncachedTileLinkPorts, new ClientUncachedTileLinkIO)
|
||||
val prci = new PRCITileIO().flip
|
||||
}
|
||||
|
||||
val io = new TileIO
|
||||
}
|
||||
|
||||
class RocketTile(clockSignal: Clock = null, resetSignal: Bool = null)
|
||||
(implicit p: Parameters) extends Tile(clockSignal, resetSignal)(p) {
|
||||
val buildRocc = p(BuildRoCC)
|
||||
val usingRocc = !buildRocc.isEmpty
|
||||
val nRocc = buildRocc.size
|
||||
val nFPUPorts = buildRocc.filter(_.useFPU).size
|
||||
|
||||
val core = Module(new Rocket)
|
||||
val icache = Module(new Frontend()(p.alterPartial({ case CacheName => "L1I" })))
|
||||
val dcache = HellaCache(p(DCacheKey))(dcacheParams)
|
||||
|
||||
val ptwPorts = collection.mutable.ArrayBuffer(icache.io.ptw, dcache.ptw)
|
||||
val dcPorts = collection.mutable.ArrayBuffer(core.io.dmem)
|
||||
val uncachedArbPorts = collection.mutable.ArrayBuffer(icache.io.mem)
|
||||
val uncachedPorts = collection.mutable.ArrayBuffer[ClientUncachedTileLinkIO]()
|
||||
val cachedPorts = collection.mutable.ArrayBuffer(dcache.mem)
|
||||
core.io.prci <> io.prci
|
||||
icache.io.cpu <> core.io.imem
|
||||
|
||||
val fpuOpt = p(FPUKey).map(cfg => Module(new FPU(cfg)))
|
||||
fpuOpt.foreach(fpu => core.io.fpu <> fpu.io)
|
||||
|
||||
if (usingRocc) {
|
||||
val respArb = Module(new RRArbiter(new RoCCResponse, nRocc))
|
||||
core.io.rocc.resp <> respArb.io.out
|
||||
|
||||
val roccOpcodes = buildRocc.map(_.opcodes)
|
||||
val cmdRouter = Module(new RoccCommandRouter(roccOpcodes))
|
||||
cmdRouter.io.in <> core.io.rocc.cmd
|
||||
|
||||
val roccs = buildRocc.zipWithIndex.map { case (accelParams, i) =>
|
||||
val rocc = accelParams.generator(p.alterPartial({
|
||||
case RoccNMemChannels => accelParams.nMemChannels
|
||||
case RoccNPTWPorts => accelParams.nPTWPorts
|
||||
}))
|
||||
val dcIF = Module(new SimpleHellaCacheIF()(dcacheParams))
|
||||
rocc.io.cmd <> cmdRouter.io.out(i)
|
||||
rocc.io.exception := core.io.rocc.exception
|
||||
dcIF.io.requestor <> rocc.io.mem
|
||||
dcPorts += dcIF.io.cache
|
||||
uncachedArbPorts += rocc.io.autl
|
||||
rocc
|
||||
}
|
||||
|
||||
if (nFPUPorts > 0) {
|
||||
fpuOpt.foreach { fpu =>
|
||||
val fpArb = Module(new InOrderArbiter(new FPInput, new FPResult, nFPUPorts))
|
||||
val fp_roccs = roccs.zip(buildRocc)
|
||||
.filter { case (_, params) => params.useFPU }
|
||||
.map { case (rocc, _) => rocc.io }
|
||||
fpArb.io.in_req <> fp_roccs.map(_.fpu_req)
|
||||
fp_roccs.zip(fpArb.io.in_resp).foreach {
|
||||
case (rocc, fpu_resp) => rocc.fpu_resp <> fpu_resp
|
||||
}
|
||||
fpu.io.cp_req <> fpArb.io.out_req
|
||||
fpArb.io.out_resp <> fpu.io.cp_resp
|
||||
}
|
||||
}
|
||||
|
||||
core.io.rocc.busy := cmdRouter.io.busy || roccs.map(_.io.busy).reduce(_ || _)
|
||||
core.io.rocc.interrupt := roccs.map(_.io.interrupt).reduce(_ || _)
|
||||
respArb.io.in <> roccs.map(rocc => Queue(rocc.io.resp))
|
||||
|
||||
ptwPorts ++= roccs.flatMap(_.io.ptw)
|
||||
uncachedPorts ++= roccs.flatMap(_.io.utl)
|
||||
}
|
||||
|
||||
val uncachedArb = Module(new ClientUncachedTileLinkIOArbiter(uncachedArbPorts.size))
|
||||
uncachedArb.io.in <> uncachedArbPorts
|
||||
uncachedArb.io.out +=: uncachedPorts
|
||||
|
||||
// Connect the caches and RoCC to the outer memory system
|
||||
io.uncached <> uncachedPorts
|
||||
io.cached <> cachedPorts
|
||||
// TODO remove nCached/nUncachedTileLinkPorts parameters and these assertions
|
||||
require(uncachedPorts.size == nUncachedTileLinkPorts)
|
||||
require(cachedPorts.size == nCachedTileLinkPorts)
|
||||
|
||||
if (p(UseVM)) {
|
||||
val ptw = Module(new PTW(ptwPorts.size)(dcacheParams))
|
||||
ptw.io.requestor <> ptwPorts
|
||||
ptw.io.mem +=: dcPorts
|
||||
core.io.ptw <> ptw.io.dpath
|
||||
}
|
||||
|
||||
require(dcPorts.size == core.dcacheArbPorts)
|
||||
val dcArb = Module(new HellaCacheArbiter(dcPorts.size)(dcacheParams))
|
||||
dcArb.io.requestor <> dcPorts
|
||||
dcache.cpu <> dcArb.io.mem
|
||||
|
||||
if (nFPUPorts == 0) {
|
||||
fpuOpt.foreach { fpu =>
|
||||
fpu.io.cp_req.valid := Bool(false)
|
||||
fpu.io.cp_resp.ready := Bool(false)
|
||||
}
|
||||
}
|
||||
}
|
||||
193
src/main/scala/rocket/tlb.scala
Normal file
193
src/main/scala/rocket/tlb.scala
Normal file
@@ -0,0 +1,193 @@
|
||||
// See LICENSE for license details.
|
||||
|
||||
package rocket
|
||||
|
||||
import Chisel._
|
||||
import Util._
|
||||
import junctions._
|
||||
import scala.math._
|
||||
import cde.{Parameters, Field}
|
||||
import uncore.agents.PseudoLRU
|
||||
import uncore.coherence._
|
||||
import uncore.util._
|
||||
|
||||
case object PgLevels extends Field[Int]
|
||||
case object ASIdBits extends Field[Int]
|
||||
case object NTLBEntries extends Field[Int]
|
||||
|
||||
trait HasTLBParameters extends HasCoreParameters {
|
||||
val entries = p(NTLBEntries)
|
||||
val camAddrBits = log2Ceil(entries)
|
||||
val camTagBits = asIdBits + vpnBits
|
||||
}
|
||||
|
||||
class TLBReq(implicit p: Parameters) extends CoreBundle()(p) {
|
||||
val vpn = UInt(width = vpnBitsExtended)
|
||||
val passthrough = Bool()
|
||||
val instruction = Bool()
|
||||
val store = Bool()
|
||||
}
|
||||
|
||||
class TLBResp(implicit p: Parameters) extends CoreBundle()(p) {
|
||||
// lookup responses
|
||||
val miss = Bool(OUTPUT)
|
||||
val ppn = UInt(OUTPUT, ppnBits)
|
||||
val xcpt_ld = Bool(OUTPUT)
|
||||
val xcpt_st = Bool(OUTPUT)
|
||||
val xcpt_if = Bool(OUTPUT)
|
||||
val cacheable = Bool(OUTPUT)
|
||||
}
|
||||
|
||||
class TLB(implicit val p: Parameters) extends Module with HasTLBParameters {
|
||||
val io = new Bundle {
|
||||
val req = Decoupled(new TLBReq).flip
|
||||
val resp = new TLBResp
|
||||
val ptw = new TLBPTWIO
|
||||
}
|
||||
|
||||
val valid = Reg(init = UInt(0, entries))
|
||||
val ppns = Reg(Vec(entries, UInt(width = ppnBits)))
|
||||
val tags = Reg(Vec(entries, UInt(width = asIdBits + vpnBits)))
|
||||
|
||||
val s_ready :: s_request :: s_wait :: s_wait_invalidate :: Nil = Enum(UInt(), 4)
|
||||
val state = Reg(init=s_ready)
|
||||
val r_refill_tag = Reg(UInt(width = asIdBits + vpnBits))
|
||||
val r_refill_waddr = Reg(UInt(width = log2Ceil(entries)))
|
||||
val r_req = Reg(new TLBReq)
|
||||
|
||||
val do_mprv = io.ptw.status.mprv && !io.req.bits.instruction
|
||||
val priv = Mux(do_mprv, io.ptw.status.mpp, io.ptw.status.prv)
|
||||
val priv_s = priv === PRV.S
|
||||
val priv_uses_vm = priv <= PRV.S && !io.ptw.status.debug
|
||||
|
||||
// share a single physical memory attribute checker (unshare if critical path)
|
||||
val passthrough_ppn = io.req.bits.vpn(ppnBits-1, 0)
|
||||
val refill_ppn = io.ptw.resp.bits.pte.ppn(ppnBits-1, 0)
|
||||
val do_refill = Bool(usingVM) && io.ptw.resp.valid
|
||||
val mpu_ppn = Mux(do_refill, refill_ppn, passthrough_ppn)
|
||||
val prot = addrMap.getProt(mpu_ppn << pgIdxBits)
|
||||
val cacheable = addrMap.isCacheable(mpu_ppn << pgIdxBits)
|
||||
def pgaligned(r: MemRegion) = {
|
||||
val pgsize = 1 << pgIdxBits
|
||||
(r.start % pgsize) == 0 && (r.size % pgsize) == 0
|
||||
}
|
||||
require(addrMap.flatten.forall(e => pgaligned(e.region)),
|
||||
"MemoryMap regions must be page-aligned")
|
||||
|
||||
val lookup_tag = Cat(io.ptw.ptbr.asid, io.req.bits.vpn(vpnBits-1,0))
|
||||
val vm_enabled = Bool(usingVM) && io.ptw.status.vm(3) && priv_uses_vm && !io.req.bits.passthrough
|
||||
val hitsVec = (0 until entries).map(i => valid(i) && vm_enabled && tags(i) === lookup_tag) :+ !vm_enabled
|
||||
val hits = hitsVec.asUInt
|
||||
|
||||
// permission bit arrays
|
||||
val pte_array = Reg(new PTE)
|
||||
val u_array = Reg(UInt(width = entries)) // user permission
|
||||
val sw_array = Reg(UInt(width = entries)) // write permission
|
||||
val sx_array = Reg(UInt(width = entries)) // execute permission
|
||||
val sr_array = Reg(UInt(width = entries)) // read permission
|
||||
val xr_array = Reg(UInt(width = entries)) // read permission to executable page
|
||||
val cash_array = Reg(UInt(width = entries)) // cacheable
|
||||
val dirty_array = Reg(UInt(width = entries)) // PTE dirty bit
|
||||
when (do_refill) {
|
||||
val pte = io.ptw.resp.bits.pte
|
||||
ppns(r_refill_waddr) := pte.ppn
|
||||
tags(r_refill_waddr) := r_refill_tag
|
||||
|
||||
val mask = UIntToOH(r_refill_waddr)
|
||||
valid := valid | mask
|
||||
u_array := Mux(pte.u, u_array | mask, u_array & ~mask)
|
||||
sw_array := Mux(pte.sw() && prot.w, sw_array | mask, sw_array & ~mask)
|
||||
sx_array := Mux(pte.sx() && prot.x, sx_array | mask, sx_array & ~mask)
|
||||
sr_array := Mux(pte.sr() && prot.r, sr_array | mask, sr_array & ~mask)
|
||||
xr_array := Mux(pte.sx() && prot.r, xr_array | mask, xr_array & ~mask)
|
||||
cash_array := Mux(cacheable, cash_array | mask, cash_array & ~mask)
|
||||
dirty_array := Mux(pte.d, dirty_array | mask, dirty_array & ~mask)
|
||||
}
|
||||
|
||||
val plru = new PseudoLRU(entries)
|
||||
val repl_waddr = Mux(!valid.andR, PriorityEncoder(~valid), plru.replace)
|
||||
|
||||
val priv_ok = Mux(priv_s, ~Mux(io.ptw.status.pum, u_array, UInt(0)), u_array)
|
||||
val w_array = Cat(prot.w, priv_ok & sw_array)
|
||||
val x_array = Cat(prot.x, priv_ok & sx_array)
|
||||
val r_array = Cat(prot.r, priv_ok & (sr_array | Mux(io.ptw.status.mxr, xr_array, UInt(0))))
|
||||
val c_array = Cat(cacheable, cash_array)
|
||||
|
||||
val bad_va =
|
||||
if (vpnBits == vpnBitsExtended) Bool(false)
|
||||
else io.req.bits.vpn(vpnBits) =/= io.req.bits.vpn(vpnBits-1)
|
||||
// it's only a store hit if the dirty bit is set
|
||||
val tlb_hits = hits(entries-1, 0) & (dirty_array | ~Mux(io.req.bits.store, w_array, UInt(0)))
|
||||
val tlb_hit = tlb_hits.orR
|
||||
val tlb_miss = vm_enabled && !bad_va && !tlb_hit
|
||||
|
||||
when (io.req.valid && !tlb_miss) {
|
||||
plru.access(OHToUInt(hits(entries-1, 0)))
|
||||
}
|
||||
|
||||
io.req.ready := state === s_ready
|
||||
io.resp.xcpt_ld := bad_va || (~r_array & hits).orR
|
||||
io.resp.xcpt_st := bad_va || (~w_array & hits).orR
|
||||
io.resp.xcpt_if := bad_va || (~x_array & hits).orR
|
||||
io.resp.cacheable := (c_array & hits).orR
|
||||
io.resp.miss := do_refill || tlb_miss
|
||||
io.resp.ppn := Mux1H(hitsVec, ppns :+ passthrough_ppn)
|
||||
|
||||
io.ptw.req.valid := state === s_request
|
||||
io.ptw.req.bits := io.ptw.status
|
||||
io.ptw.req.bits.addr := r_refill_tag
|
||||
io.ptw.req.bits.store := r_req.store
|
||||
io.ptw.req.bits.fetch := r_req.instruction
|
||||
|
||||
if (usingVM) {
|
||||
when (io.req.fire() && tlb_miss) {
|
||||
state := s_request
|
||||
r_refill_tag := lookup_tag
|
||||
r_refill_waddr := repl_waddr
|
||||
r_req := io.req.bits
|
||||
}
|
||||
when (state === s_request) {
|
||||
when (io.ptw.invalidate) {
|
||||
state := s_ready
|
||||
}
|
||||
when (io.ptw.req.ready) {
|
||||
state := s_wait
|
||||
when (io.ptw.invalidate) { state := s_wait_invalidate }
|
||||
}
|
||||
}
|
||||
when (state === s_wait && io.ptw.invalidate) {
|
||||
state := s_wait_invalidate
|
||||
}
|
||||
when (io.ptw.resp.valid) {
|
||||
state := s_ready
|
||||
}
|
||||
|
||||
when (io.ptw.invalidate) {
|
||||
valid := 0
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
class DecoupledTLB(implicit p: Parameters) extends Module {
|
||||
val io = new Bundle {
|
||||
val req = Decoupled(new TLBReq).flip
|
||||
val resp = Decoupled(new TLBResp)
|
||||
val ptw = new TLBPTWIO
|
||||
}
|
||||
|
||||
val reqq = Queue(io.req)
|
||||
val tlb = Module(new TLB)
|
||||
|
||||
val resp_helper = DecoupledHelper(
|
||||
reqq.valid, tlb.io.req.ready, io.resp.ready)
|
||||
val tlb_miss = tlb.io.resp.miss
|
||||
|
||||
tlb.io.req.valid := resp_helper.fire(tlb.io.req.ready)
|
||||
tlb.io.req.bits := reqq.bits
|
||||
reqq.ready := resp_helper.fire(reqq.valid, !tlb_miss)
|
||||
|
||||
io.resp.valid := resp_helper.fire(io.resp.ready, !tlb_miss)
|
||||
io.resp.bits := tlb.io.resp
|
||||
|
||||
io.ptw <> tlb.io.ptw
|
||||
}
|
||||
159
src/main/scala/rocket/util.scala
Normal file
159
src/main/scala/rocket/util.scala
Normal file
@@ -0,0 +1,159 @@
|
||||
// See LICENSE for license details.
|
||||
|
||||
package rocket
|
||||
|
||||
import Chisel._
|
||||
import uncore.util._
|
||||
import scala.math._
|
||||
import cde.{Parameters, Field}
|
||||
|
||||
object Util {
|
||||
implicit def uintToBitPat(x: UInt): BitPat = BitPat(x)
|
||||
implicit def intToUInt(x: Int): UInt = UInt(x)
|
||||
implicit def bigIntToUInt(x: BigInt): UInt = UInt(x)
|
||||
implicit def booleanToBool(x: Boolean): Bits = Bool(x)
|
||||
implicit def intSeqToUIntSeq(x: Seq[Int]): Seq[UInt] = x.map(UInt(_))
|
||||
implicit def wcToUInt(c: WideCounter): UInt = c.value
|
||||
|
||||
implicit class UIntToAugmentedUInt(val x: UInt) extends AnyVal {
|
||||
def sextTo(n: Int): UInt =
|
||||
if (x.getWidth == n) x
|
||||
else Cat(Fill(n - x.getWidth, x(x.getWidth-1)), x)
|
||||
|
||||
def extract(hi: Int, lo: Int): UInt = {
|
||||
if (hi == lo-1) UInt(0)
|
||||
else x(hi, lo)
|
||||
}
|
||||
}
|
||||
|
||||
implicit def booleanToIntConv(x: Boolean) = new AnyRef {
|
||||
def toInt: Int = if (x) 1 else 0
|
||||
}
|
||||
}
|
||||
|
||||
import Util._
|
||||
|
||||
object Str
|
||||
{
|
||||
def apply(s: String): UInt = {
|
||||
var i = BigInt(0)
|
||||
require(s.forall(validChar _))
|
||||
for (c <- s)
|
||||
i = (i << 8) | c
|
||||
UInt(i, s.length*8)
|
||||
}
|
||||
def apply(x: Char): UInt = {
|
||||
require(validChar(x))
|
||||
UInt(x.toInt, 8)
|
||||
}
|
||||
def apply(x: UInt): UInt = apply(x, 10)
|
||||
def apply(x: UInt, radix: Int): UInt = {
|
||||
val rad = UInt(radix)
|
||||
val w = x.getWidth
|
||||
require(w > 0)
|
||||
|
||||
var q = x
|
||||
var s = digit(q % rad)
|
||||
for (i <- 1 until ceil(log(2)/log(radix)*w).toInt) {
|
||||
q = q / rad
|
||||
s = Cat(Mux(Bool(radix == 10) && q === UInt(0), Str(' '), digit(q % rad)), s)
|
||||
}
|
||||
s
|
||||
}
|
||||
def apply(x: SInt): UInt = apply(x, 10)
|
||||
def apply(x: SInt, radix: Int): UInt = {
|
||||
val neg = x < SInt(0)
|
||||
val abs = x.abs
|
||||
if (radix != 10) {
|
||||
Cat(Mux(neg, Str('-'), Str(' ')), Str(abs, radix))
|
||||
} else {
|
||||
val rad = UInt(radix)
|
||||
val w = abs.getWidth
|
||||
require(w > 0)
|
||||
|
||||
var q = abs
|
||||
var s = digit(q % rad)
|
||||
var needSign = neg
|
||||
for (i <- 1 until ceil(log(2)/log(radix)*w).toInt) {
|
||||
q = q / rad
|
||||
val placeSpace = q === UInt(0)
|
||||
val space = Mux(needSign, Str('-'), Str(' '))
|
||||
needSign = needSign && !placeSpace
|
||||
s = Cat(Mux(placeSpace, space, digit(q % rad)), s)
|
||||
}
|
||||
Cat(Mux(needSign, Str('-'), Str(' ')), s)
|
||||
}
|
||||
}
|
||||
|
||||
private def digit(d: UInt): UInt = Mux(d < UInt(10), Str('0')+d, Str(('a'-10).toChar)+d)(7,0)
|
||||
private def validChar(x: Char) = x == (x & 0xFF)
|
||||
}
|
||||
|
||||
object Split
|
||||
{
|
||||
// is there a better way to do do this?
|
||||
def apply(x: Bits, n0: Int) = {
|
||||
val w = checkWidth(x, n0)
|
||||
(x(w-1,n0), x(n0-1,0))
|
||||
}
|
||||
def apply(x: Bits, n1: Int, n0: Int) = {
|
||||
val w = checkWidth(x, n1, n0)
|
||||
(x(w-1,n1), x(n1-1,n0), x(n0-1,0))
|
||||
}
|
||||
def apply(x: Bits, n2: Int, n1: Int, n0: Int) = {
|
||||
val w = checkWidth(x, n2, n1, n0)
|
||||
(x(w-1,n2), x(n2-1,n1), x(n1-1,n0), x(n0-1,0))
|
||||
}
|
||||
|
||||
private def checkWidth(x: Bits, n: Int*) = {
|
||||
val w = x.getWidth
|
||||
def decreasing(x: Seq[Int]): Boolean =
|
||||
if (x.tail.isEmpty) true
|
||||
else x.head >= x.tail.head && decreasing(x.tail)
|
||||
require(decreasing(w :: n.toList))
|
||||
w
|
||||
}
|
||||
}
|
||||
|
||||
// a counter that clock gates most of its MSBs using the LSB carry-out
|
||||
case class WideCounter(width: Int, inc: UInt = UInt(1))
|
||||
{
|
||||
private val isWide = width > 2*inc.getWidth
|
||||
private val smallWidth = if (isWide) inc.getWidth max log2Up(width) else width
|
||||
private val small = Reg(init=UInt(0, smallWidth))
|
||||
private val nextSmall = small +& inc
|
||||
small := nextSmall
|
||||
|
||||
private val large = if (isWide) {
|
||||
val r = Reg(init=UInt(0, width - smallWidth))
|
||||
when (nextSmall(smallWidth)) { r := r + UInt(1) }
|
||||
r
|
||||
} else null
|
||||
|
||||
val value = if (isWide) Cat(large, small) else small
|
||||
|
||||
def := (x: UInt) = {
|
||||
small := x
|
||||
if (isWide) large := x >> smallWidth
|
||||
}
|
||||
}
|
||||
|
||||
object Random
|
||||
{
|
||||
def apply(mod: Int, random: UInt): UInt = {
|
||||
if (isPow2(mod)) random(log2Up(mod)-1,0)
|
||||
else PriorityEncoder(partition(apply(1 << log2Up(mod*8), random), mod))
|
||||
}
|
||||
def apply(mod: Int): UInt = apply(mod, randomizer)
|
||||
def oneHot(mod: Int, random: UInt): UInt = {
|
||||
if (isPow2(mod)) UIntToOH(random(log2Up(mod)-1,0))
|
||||
else PriorityEncoderOH(partition(apply(1 << log2Up(mod*8), random), mod)).asUInt
|
||||
}
|
||||
def oneHot(mod: Int): UInt = oneHot(mod, randomizer)
|
||||
|
||||
private def randomizer = LFSR16()
|
||||
private def round(x: Double): Int =
|
||||
if (x.toInt.toDouble == x) x.toInt else (x.toInt + 1) & -2
|
||||
private def partition(value: UInt, slices: Int) =
|
||||
Seq.tabulate(slices)(i => value < round((i << value.getWidth).toDouble / slices))
|
||||
}
|
||||
117
src/main/scala/uncore/Builder.scala
Normal file
117
src/main/scala/uncore/Builder.scala
Normal file
@@ -0,0 +1,117 @@
|
||||
package uncore
|
||||
|
||||
import Chisel._
|
||||
import cde.{Config, Parameters, ParameterDump, Knob, Dump}
|
||||
import junctions.PAddrBits
|
||||
import uncore.tilelink._
|
||||
import uncore.agents._
|
||||
import uncore.coherence._
|
||||
|
||||
object UncoreBuilder extends App {
|
||||
val topModuleName = args(0)
|
||||
val configClassName = args(1)
|
||||
val config = try {
|
||||
Class.forName(s"uncore.$configClassName").newInstance.asInstanceOf[Config]
|
||||
} catch {
|
||||
case e: java.lang.ClassNotFoundException =>
|
||||
throwException("Unable to find configClassName \"" + configClassName +
|
||||
"\", did you misspell it?", e)
|
||||
}
|
||||
val world = config.toInstance
|
||||
val paramsFromConfig: Parameters = Parameters.root(world)
|
||||
|
||||
val gen = () =>
|
||||
Class.forName(s"uncore.$topModuleName")
|
||||
.getConstructor(classOf[cde.Parameters])
|
||||
.newInstance(paramsFromConfig)
|
||||
.asInstanceOf[Module]
|
||||
|
||||
chiselMain.run(args.drop(2), gen)
|
||||
|
||||
val pdFile = new java.io.FileWriter(s"${Driver.targetDir}/$topModuleName.prm")
|
||||
pdFile.write(ParameterDump.getDump)
|
||||
pdFile.close
|
||||
|
||||
}
|
||||
|
||||
class DefaultL2Config extends Config (
|
||||
topDefinitions = { (pname,site,here) =>
|
||||
pname match {
|
||||
case PAddrBits => 32
|
||||
case CacheId => 0
|
||||
case CacheName => "L2Bank"
|
||||
case TLId => "L1toL2"
|
||||
case InnerTLId => "L1toL2"
|
||||
case OuterTLId => "L2toMC"
|
||||
case "N_CACHED" => Dump("N_CACHED",here[Int]("CACHED_CLIENTS_PER_PORT"))
|
||||
case "N_UNCACHED" => Dump("N_UNCACHED",here[Int]("MAX_CLIENTS_PER_PORT") - here[Int]("N_CACHED"))
|
||||
case "MAX_CLIENT_XACTS" => 4
|
||||
case "MAX_CLIENTS_PER_PORT" => Knob("NTILES")
|
||||
case "CACHED_CLIENTS_PER_PORT" => Knob("N_CACHED_TILES")
|
||||
case TLKey("L1toL2") =>
|
||||
TileLinkParameters(
|
||||
coherencePolicy = new MESICoherence(site(L2DirectoryRepresentation)),
|
||||
nManagers = 1,
|
||||
nCachingClients = here[Int]("N_CACHED"),
|
||||
nCachelessClients = here[Int]("N_UNCACHED"),
|
||||
maxClientXacts = here[Int]("MAX_CLIENT_XACTS"),
|
||||
maxClientsPerPort = here[Int]("MAX_CLIENTS_PER_PORT"),
|
||||
maxManagerXacts = site(NAcquireTransactors) + 2,
|
||||
dataBits = site(CacheBlockBytes)*8,
|
||||
dataBeats = 2)
|
||||
case TLKey("L2toMC") =>
|
||||
TileLinkParameters(
|
||||
coherencePolicy = new MEICoherence(new NullRepresentation(1)),
|
||||
nManagers = 1,
|
||||
nCachingClients = 1,
|
||||
nCachelessClients = 0,
|
||||
maxClientXacts = 1,
|
||||
maxClientsPerPort = site(NAcquireTransactors) + 2,
|
||||
maxManagerXacts = 1,
|
||||
dataBits = site(CacheBlockBytes)*8,
|
||||
dataBeats = 2)
|
||||
case CacheBlockBytes => 64
|
||||
case CacheBlockOffsetBits => log2Up(here(CacheBlockBytes))
|
||||
case "L2_SETS" => Knob("L2_SETS")
|
||||
case NSets => Dump("L2_SETS",here[Int]("L2_SETS"))
|
||||
case NWays => Knob("L2_WAYS")
|
||||
case RowBits => site(TLKey(site(TLId))).dataBitsPerBeat
|
||||
case CacheIdBits => Dump("CACHE_ID_BITS",1)
|
||||
case L2StoreDataQueueDepth => 1
|
||||
case NAcquireTransactors => Dump("N_ACQUIRE_TRANSACTORS",2)
|
||||
case NSecondaryMisses => 4
|
||||
case L2DirectoryRepresentation => new FullRepresentation(here[Int]("N_CACHED"))
|
||||
case L2Replacer => () => new SeqRandom(site(NWays))
|
||||
case ECCCode => None
|
||||
case AmoAluOperandBits => 64
|
||||
case SplitMetadata => false
|
||||
// case XLen => 128
|
||||
}},
|
||||
knobValues = {
|
||||
case "L2_WAYS" => 1
|
||||
case "L2_SETS" => 1024
|
||||
case "NTILES" => 2
|
||||
case "N_CACHED_TILES" => 2
|
||||
case "L2_CAPACITY_IN_KB" => 256
|
||||
}
|
||||
)
|
||||
|
||||
class WithPLRU extends Config(
|
||||
(pname, site, here) => pname match {
|
||||
case L2Replacer => () => new SeqPLRU(site(NSets), site(NWays))
|
||||
})
|
||||
|
||||
class PLRUL2Config extends Config(new WithPLRU ++ new DefaultL2Config)
|
||||
|
||||
class With1L2Ways extends Config(knobValues = { case "L2_WAYS" => 1 })
|
||||
class With2L2Ways extends Config(knobValues = { case "L2_WAYS" => 2 })
|
||||
class With4L2Ways extends Config(knobValues = { case "L2_WAYS" => 4 })
|
||||
|
||||
class With1Cached extends Config(knobValues = { case "N_CACHED_TILES" => 1 })
|
||||
class With2Cached extends Config(knobValues = { case "N_CACHED_TILES" => 2 })
|
||||
|
||||
|
||||
class W1Cached1WaysConfig extends Config(new With1L2Ways ++ new With1Cached ++ new DefaultL2Config)
|
||||
class W1Cached2WaysConfig extends Config(new With2L2Ways ++ new With1Cached ++ new DefaultL2Config)
|
||||
class W2Cached1WaysConfig extends Config(new With1L2Ways ++ new With2Cached ++ new DefaultL2Config)
|
||||
class W2Cached2WaysConfig extends Config(new With2L2Ways ++ new With2Cached ++ new DefaultL2Config)
|
||||
39
src/main/scala/uncore/Consts.scala
Normal file
39
src/main/scala/uncore/Consts.scala
Normal file
@@ -0,0 +1,39 @@
|
||||
// See LICENSE for license details.
|
||||
|
||||
package uncore
|
||||
package constants
|
||||
|
||||
import Chisel._
|
||||
|
||||
object MemoryOpConstants extends MemoryOpConstants
|
||||
trait MemoryOpConstants {
|
||||
val NUM_XA_OPS = 9
|
||||
val M_SZ = 5
|
||||
val M_X = BitPat("b?????");
|
||||
val M_XRD = UInt("b00000"); // int load
|
||||
val M_XWR = UInt("b00001"); // int store
|
||||
val M_PFR = UInt("b00010"); // prefetch with intent to read
|
||||
val M_PFW = UInt("b00011"); // prefetch with intent to write
|
||||
val M_XA_SWAP = UInt("b00100");
|
||||
val M_FLUSH_ALL = UInt("b00101") // flush all lines
|
||||
val M_XLR = UInt("b00110");
|
||||
val M_XSC = UInt("b00111");
|
||||
val M_XA_ADD = UInt("b01000");
|
||||
val M_XA_XOR = UInt("b01001");
|
||||
val M_XA_OR = UInt("b01010");
|
||||
val M_XA_AND = UInt("b01011");
|
||||
val M_XA_MIN = UInt("b01100");
|
||||
val M_XA_MAX = UInt("b01101");
|
||||
val M_XA_MINU = UInt("b01110");
|
||||
val M_XA_MAXU = UInt("b01111");
|
||||
val M_FLUSH = UInt("b10000") // write back dirty data and cede R/W permissions
|
||||
val M_PRODUCE = UInt("b10001") // write back dirty data and cede W permissions
|
||||
val M_CLEAN = UInt("b10011") // write back dirty data and retain R/W permissions
|
||||
|
||||
def isAMO(cmd: UInt) = cmd(3) || cmd === M_XA_SWAP
|
||||
def isPrefetch(cmd: UInt) = cmd === M_PFR || cmd === M_PFW
|
||||
def isRead(cmd: UInt) = cmd === M_XRD || cmd === M_XLR || cmd === M_XSC || isAMO(cmd)
|
||||
def isWrite(cmd: UInt) = cmd === M_XWR || cmd === M_XSC || isAMO(cmd)
|
||||
def isWriteIntent(cmd: UInt) = isWrite(cmd) || cmd === M_PFW || cmd === M_XLR
|
||||
}
|
||||
|
||||
4
src/main/scala/uncore/Package.scala
Normal file
4
src/main/scala/uncore/Package.scala
Normal file
@@ -0,0 +1,4 @@
|
||||
// See LICENSE for license details.
|
||||
package uncore
|
||||
|
||||
package object constants extends uncore.constants.MemoryOpConstants
|
||||
162
src/main/scala/uncore/agents/Agents.scala
Normal file
162
src/main/scala/uncore/agents/Agents.scala
Normal file
@@ -0,0 +1,162 @@
|
||||
// See LICENSE for license details.
|
||||
|
||||
package uncore.agents
|
||||
|
||||
import Chisel._
|
||||
import cde.{Parameters, Field}
|
||||
import junctions._
|
||||
import uncore.tilelink._
|
||||
import uncore.converters._
|
||||
import uncore.coherence._
|
||||
import uncore.util._
|
||||
|
||||
case object NReleaseTransactors extends Field[Int]
|
||||
case object NProbeTransactors extends Field[Int]
|
||||
case object NAcquireTransactors extends Field[Int]
|
||||
|
||||
trait HasCoherenceAgentParameters {
|
||||
implicit val p: Parameters
|
||||
val nReleaseTransactors = 1
|
||||
val nAcquireTransactors = p(NAcquireTransactors)
|
||||
val nTransactors = nReleaseTransactors + nAcquireTransactors
|
||||
val blockAddrBits = p(PAddrBits) - p(CacheBlockOffsetBits)
|
||||
val outerTLId = p(OuterTLId)
|
||||
val outerTLParams = p(TLKey(outerTLId))
|
||||
val outerDataBeats = outerTLParams.dataBeats
|
||||
val outerDataBits = outerTLParams.dataBitsPerBeat
|
||||
val outerBeatAddrBits = log2Up(outerDataBeats)
|
||||
val outerByteAddrBits = log2Up(outerDataBits/8)
|
||||
val outerWriteMaskBits = outerTLParams.writeMaskBits
|
||||
val innerTLId = p(InnerTLId)
|
||||
val innerTLParams = p(TLKey(innerTLId))
|
||||
val innerDataBeats = innerTLParams.dataBeats
|
||||
val innerDataBits = innerTLParams.dataBitsPerBeat
|
||||
val innerWriteMaskBits = innerTLParams.writeMaskBits
|
||||
val innerBeatAddrBits = log2Up(innerDataBeats)
|
||||
val innerByteAddrBits = log2Up(innerDataBits/8)
|
||||
val innerNCachingClients = innerTLParams.nCachingClients
|
||||
val maxManagerXacts = innerTLParams.maxManagerXacts
|
||||
require(outerDataBeats == innerDataBeats) //TODO: fix all xact_data Vecs to remove this requirement
|
||||
}
|
||||
|
||||
abstract class CoherenceAgentModule(implicit val p: Parameters) extends Module
|
||||
with HasCoherenceAgentParameters
|
||||
abstract class CoherenceAgentBundle(implicit val p: Parameters) extends junctions.ParameterizedBundle()(p)
|
||||
with HasCoherenceAgentParameters
|
||||
|
||||
trait HasCoherenceAgentWiringHelpers {
|
||||
def doOutputArbitration[T <: TileLinkChannel](
|
||||
out: DecoupledIO[T],
|
||||
ins: Seq[DecoupledIO[T]]) {
|
||||
def lock(o: T) = o.hasMultibeatData()
|
||||
val arb = Module(new LockingRRArbiter(out.bits, ins.size, out.bits.tlDataBeats, Some(lock _)))
|
||||
out <> arb.io.out
|
||||
arb.io.in <> ins
|
||||
}
|
||||
|
||||
def doInputRouting[T <: Bundle with HasManagerTransactionId](
|
||||
in: DecoupledIO[T],
|
||||
outs: Seq[DecoupledIO[T]]) {
|
||||
val idx = in.bits.manager_xact_id
|
||||
outs.map(_.bits := in.bits)
|
||||
outs.zipWithIndex.map { case (o,i) => o.valid := in.valid && idx === UInt(i) }
|
||||
in.ready := outs.map(_.ready).apply(idx)
|
||||
}
|
||||
|
||||
/** Broadcasts valid messages on this channel to all trackers,
|
||||
* but includes logic to allocate a new tracker in the case where
|
||||
* no previously allocated tracker matches the new req's addr.
|
||||
*
|
||||
* When a match is reported, if ready is high the new transaction
|
||||
* is merged; when ready is low the transaction is being blocked.
|
||||
* When no match is reported, any high idles are presumed to be
|
||||
* from trackers that are available for allocation, and one is
|
||||
* assigned via alloc based on priority; if no idles are high then
|
||||
* all trackers are busy with other transactions. If idle is high
|
||||
* but ready is low, the tracker will be allocated but does not
|
||||
* have sufficient buffering for the data.
|
||||
*/
|
||||
def doInputRoutingWithAllocation[T <: TileLinkChannel with HasTileLinkData](
|
||||
in: DecoupledIO[T],
|
||||
outs: Seq[DecoupledIO[T]],
|
||||
allocs: Seq[TrackerAllocation],
|
||||
dataOverrides: Option[Seq[UInt]] = None,
|
||||
allocOverride: Option[Bool] = None,
|
||||
matchOverride: Option[Bool] = None) {
|
||||
val ready_bits = outs.map(_.ready).asUInt
|
||||
val can_alloc_bits = allocs.map(_.can).asUInt
|
||||
val should_alloc_bits = PriorityEncoderOH(can_alloc_bits)
|
||||
val match_bits = allocs.map(_.matches).asUInt
|
||||
val no_matches = !match_bits.orR
|
||||
val alloc_ok = allocOverride.getOrElse(Bool(true))
|
||||
val match_ok = matchOverride.getOrElse(Bool(true))
|
||||
in.ready := (Mux(no_matches, can_alloc_bits, match_bits) & ready_bits).orR && alloc_ok && match_ok
|
||||
outs.zip(allocs).zipWithIndex.foreach { case((out, alloc), i) =>
|
||||
out.valid := in.valid && match_ok && alloc_ok
|
||||
out.bits := in.bits
|
||||
dataOverrides foreach { d => out.bits.data := d(i) }
|
||||
alloc.should := should_alloc_bits(i) && no_matches && alloc_ok
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
trait HasInnerTLIO extends HasCoherenceAgentParameters {
|
||||
val inner = new ManagerTileLinkIO()(p.alterPartial({case TLId => p(InnerTLId)}))
|
||||
val incoherent = Vec(inner.tlNCachingClients, Bool()).asInput
|
||||
def iacq(dummy: Int = 0) = inner.acquire.bits
|
||||
def iprb(dummy: Int = 0) = inner.probe.bits
|
||||
def irel(dummy: Int = 0) = inner.release.bits
|
||||
def ignt(dummy: Int = 0) = inner.grant.bits
|
||||
def ifin(dummy: Int = 0) = inner.finish.bits
|
||||
}
|
||||
|
||||
trait HasUncachedOuterTLIO extends HasCoherenceAgentParameters {
|
||||
val outer = new ClientUncachedTileLinkIO()(p.alterPartial({case TLId => p(OuterTLId)}))
|
||||
def oacq(dummy: Int = 0) = outer.acquire.bits
|
||||
def ognt(dummy: Int = 0) = outer.grant.bits
|
||||
}
|
||||
|
||||
trait HasCachedOuterTLIO extends HasCoherenceAgentParameters {
|
||||
val outer = new ClientTileLinkIO()(p.alterPartial({case TLId => p(OuterTLId)}))
|
||||
def oacq(dummy: Int = 0) = outer.acquire.bits
|
||||
def oprb(dummy: Int = 0) = outer.probe.bits
|
||||
def orel(dummy: Int = 0) = outer.release.bits
|
||||
def ognt(dummy: Int = 0) = outer.grant.bits
|
||||
}
|
||||
|
||||
class ManagerTLIO(implicit p: Parameters) extends CoherenceAgentBundle()(p)
|
||||
with HasInnerTLIO
|
||||
with HasUncachedOuterTLIO
|
||||
|
||||
abstract class CoherenceAgent(implicit p: Parameters) extends CoherenceAgentModule()(p) {
|
||||
def innerTL: ManagerTileLinkIO
|
||||
def outerTL: ClientTileLinkIO
|
||||
def incoherent: Vec[Bool]
|
||||
}
|
||||
|
||||
abstract class ManagerCoherenceAgent(implicit p: Parameters) extends CoherenceAgent()(p)
|
||||
with HasCoherenceAgentWiringHelpers {
|
||||
val io = new ManagerTLIO
|
||||
def innerTL = io.inner
|
||||
def outerTL = TileLinkIOWrapper(io.outer)(p.alterPartial({case TLId => p(OuterTLId)}))
|
||||
def incoherent = io.incoherent
|
||||
}
|
||||
|
||||
class HierarchicalTLIO(implicit p: Parameters) extends CoherenceAgentBundle()(p)
|
||||
with HasInnerTLIO
|
||||
with HasCachedOuterTLIO
|
||||
|
||||
abstract class HierarchicalCoherenceAgent(implicit p: Parameters) extends CoherenceAgent()(p)
|
||||
with HasCoherenceAgentWiringHelpers {
|
||||
val io = new HierarchicalTLIO
|
||||
def innerTL = io.inner
|
||||
def outerTL = io.outer
|
||||
def incoherent = io.incoherent
|
||||
|
||||
// TODO: Remove this function (and all its calls) when we support probing the L2
|
||||
def disconnectOuterProbeAndFinish() {
|
||||
io.outer.probe.ready := Bool(false)
|
||||
io.outer.finish.valid := Bool(false)
|
||||
assert(!io.outer.probe.valid, "L2 agent got illegal probe")
|
||||
}
|
||||
}
|
||||
204
src/main/scala/uncore/agents/Broadcast.scala
Normal file
204
src/main/scala/uncore/agents/Broadcast.scala
Normal file
@@ -0,0 +1,204 @@
|
||||
// See LICENSE for license details.
|
||||
|
||||
package uncore.agents
|
||||
|
||||
import Chisel._
|
||||
import uncore.coherence._
|
||||
import uncore.tilelink._
|
||||
import uncore.constants._
|
||||
import uncore.util._
|
||||
import cde.Parameters
|
||||
|
||||
class L2BroadcastHub(implicit p: Parameters) extends HierarchicalCoherenceAgent()(p) {
|
||||
|
||||
// Create TSHRs for outstanding transactions
|
||||
val irelTrackerList =
|
||||
(0 until nReleaseTransactors).map(id =>
|
||||
Module(new BufferedBroadcastVoluntaryReleaseTracker(id)))
|
||||
val iacqTrackerList =
|
||||
(nReleaseTransactors until nTransactors).map(id =>
|
||||
Module(new BufferedBroadcastAcquireTracker(id)))
|
||||
val trackerList = irelTrackerList ++ iacqTrackerList
|
||||
|
||||
// Propagate incoherence flags
|
||||
trackerList.map(_.io.incoherent) foreach { _ := io.incoherent }
|
||||
|
||||
// Create an arbiter for the one memory port
|
||||
val outerList = trackerList.map(_.io.outer)
|
||||
val outer_arb = Module(new ClientTileLinkIOArbiter(outerList.size)
|
||||
(p.alterPartial({ case TLId => p(OuterTLId) })))
|
||||
outer_arb.io.in <> outerList
|
||||
io.outer <> outer_arb.io.out
|
||||
|
||||
// Handle acquire transaction initiation
|
||||
val irel_vs_iacq_conflict =
|
||||
io.inner.acquire.valid &&
|
||||
io.inner.release.valid &&
|
||||
io.irel().conflicts(io.iacq())
|
||||
|
||||
doInputRoutingWithAllocation(
|
||||
in = io.inner.acquire,
|
||||
outs = trackerList.map(_.io.inner.acquire),
|
||||
allocs = trackerList.map(_.io.alloc.iacq),
|
||||
allocOverride = Some(!irel_vs_iacq_conflict))
|
||||
|
||||
// Handle releases, which might be voluntary and might have data
|
||||
doInputRoutingWithAllocation(
|
||||
in = io.inner.release,
|
||||
outs = trackerList.map(_.io.inner.release),
|
||||
allocs = trackerList.map(_.io.alloc.irel))
|
||||
|
||||
// Wire probe requests and grant reply to clients, finish acks from clients
|
||||
doOutputArbitration(io.inner.probe, trackerList.map(_.io.inner.probe))
|
||||
|
||||
doOutputArbitration(io.inner.grant, trackerList.map(_.io.inner.grant))
|
||||
|
||||
doInputRouting(io.inner.finish, trackerList.map(_.io.inner.finish))
|
||||
|
||||
disconnectOuterProbeAndFinish()
|
||||
}
|
||||
|
||||
class BroadcastXactTracker(implicit p: Parameters) extends XactTracker()(p) {
|
||||
val io = new HierarchicalXactTrackerIO
|
||||
pinAllReadyValidLow(io)
|
||||
}
|
||||
|
||||
trait BroadcastsToAllClients extends HasCoherenceAgentParameters {
|
||||
val coh = HierarchicalMetadata.onReset
|
||||
val inner_coh = coh.inner
|
||||
val outer_coh = coh.outer
|
||||
def full_representation = ~UInt(0, width = innerNCachingClients)
|
||||
}
|
||||
|
||||
abstract class BroadcastVoluntaryReleaseTracker(trackerId: Int)(implicit p: Parameters)
|
||||
extends VoluntaryReleaseTracker(trackerId)(p)
|
||||
with EmitsVoluntaryReleases
|
||||
with BroadcastsToAllClients {
|
||||
val io = new HierarchicalXactTrackerIO
|
||||
pinAllReadyValidLow(io)
|
||||
|
||||
// Checks for illegal behavior
|
||||
assert(!(state === s_idle && io.inner.release.fire() && io.alloc.irel.should && !io.irel().isVoluntary()),
|
||||
"VoluntaryReleaseTracker accepted Release that wasn't voluntary!")
|
||||
}
|
||||
|
||||
abstract class BroadcastAcquireTracker(trackerId: Int)(implicit p: Parameters)
|
||||
extends AcquireTracker(trackerId)(p)
|
||||
with EmitsVoluntaryReleases
|
||||
with BroadcastsToAllClients {
|
||||
val io = new HierarchicalXactTrackerIO
|
||||
pinAllReadyValidLow(io)
|
||||
|
||||
val alwaysWriteFullBeat = false
|
||||
val nSecondaryMisses = 1
|
||||
def iacq_can_merge = Bool(false)
|
||||
|
||||
// Checks for illegal behavior
|
||||
// TODO: this could be allowed, but is a useful check against allocation gone wild
|
||||
assert(!(state === s_idle && io.inner.acquire.fire() && io.alloc.iacq.should &&
|
||||
io.iacq().hasMultibeatData() && !io.iacq().first()),
|
||||
"AcquireTracker initialized with a tail data beat.")
|
||||
|
||||
assert(!(state =/= s_idle && pending_ignt && xact_iacq.isPrefetch()),
|
||||
"Broadcast Hub does not support Prefetches.")
|
||||
|
||||
assert(!(state =/= s_idle && pending_ignt && xact_iacq.isAtomic()),
|
||||
"Broadcast Hub does not support PutAtomics.")
|
||||
}
|
||||
|
||||
class BufferedBroadcastVoluntaryReleaseTracker(trackerId: Int)(implicit p: Parameters)
|
||||
extends BroadcastVoluntaryReleaseTracker(trackerId)(p)
|
||||
with HasDataBuffer {
|
||||
|
||||
// Tell the parent if any incoming messages conflict with the ongoing transaction
|
||||
routeInParent(irelCanAlloc = Bool(true))
|
||||
|
||||
// Start transaction by accepting inner release
|
||||
innerRelease(block_vol_ignt = pending_orel || vol_ognt_counter.pending)
|
||||
|
||||
// A release beat can be accepted if we are idle, if its a mergeable transaction, or if its a tail beat
|
||||
io.inner.release.ready := state === s_idle || irel_can_merge || irel_same_xact
|
||||
|
||||
when(io.inner.release.fire()) { data_buffer(io.irel().addr_beat) := io.irel().data }
|
||||
|
||||
// Dispatch outer release
|
||||
outerRelease(
|
||||
coh = outer_coh.onHit(M_XWR),
|
||||
data = data_buffer(vol_ognt_counter.up.idx),
|
||||
add_pending_send_bit = irel_is_allocating)
|
||||
|
||||
quiesce() {}
|
||||
}
|
||||
|
||||
class BufferedBroadcastAcquireTracker(trackerId: Int)(implicit p: Parameters)
|
||||
extends BroadcastAcquireTracker(trackerId)(p)
|
||||
with HasByteWriteMaskBuffer {
|
||||
|
||||
// Setup IOs used for routing in the parent
|
||||
routeInParent(iacqCanAlloc = Bool(true))
|
||||
|
||||
// First, take care of accpeting new acquires or secondary misses
|
||||
// Handling of primary and secondary misses' data and write mask merging
|
||||
innerAcquire(
|
||||
can_alloc = Bool(false),
|
||||
next = s_inner_probe)
|
||||
|
||||
io.inner.acquire.ready := state === s_idle || iacq_can_merge || iacq_same_xact_multibeat
|
||||
|
||||
// Track which clients yet need to be probed and make Probe message
|
||||
// If a writeback occurs, we can forward its data via the buffer,
|
||||
// and skip having to go outwards
|
||||
val skip_outer_acquire = pending_ignt_data.andR
|
||||
|
||||
innerProbe(
|
||||
inner_coh.makeProbe(curr_probe_dst, xact_iacq, xact_addr_block),
|
||||
Mux(!skip_outer_acquire, s_outer_acquire, s_busy))
|
||||
|
||||
// Handle incoming releases from clients, which may reduce sharer counts
|
||||
// and/or write back dirty data, and may be unexpected voluntary releases
|
||||
def irel_can_merge = io.irel().conflicts(xact_addr_block) &&
|
||||
io.irel().isVoluntary() &&
|
||||
!state.isOneOf(s_idle, s_meta_write) &&
|
||||
!all_pending_done &&
|
||||
!io.outer.grant.fire() &&
|
||||
!io.inner.grant.fire() &&
|
||||
!vol_ignt_counter.pending &&
|
||||
!blockInnerRelease()
|
||||
|
||||
innerRelease(block_vol_ignt = vol_ognt_counter.pending)
|
||||
|
||||
//TODO: accept vol irels when state === s_idle, operate like the VolRelTracker
|
||||
io.inner.release.ready := irel_can_merge || irel_same_xact
|
||||
|
||||
mergeDataInner(io.inner.release)
|
||||
|
||||
// If there was a writeback, forward it outwards
|
||||
outerRelease(
|
||||
coh = outer_coh.onHit(M_XWR),
|
||||
data = data_buffer(vol_ognt_counter.up.idx))
|
||||
|
||||
// Send outer request for miss
|
||||
outerAcquire(
|
||||
caching = !xact_iacq.isBuiltInType(),
|
||||
coh = outer_coh,
|
||||
data = data_buffer(ognt_counter.up.idx),
|
||||
wmask = wmask_buffer(ognt_counter.up.idx),
|
||||
next = s_busy)
|
||||
|
||||
// Handle the response from outer memory
|
||||
mergeDataOuter(io.outer.grant)
|
||||
|
||||
// Acknowledge or respond with data
|
||||
innerGrant(
|
||||
data = data_buffer(ignt_data_idx),
|
||||
external_pending = pending_orel || ognt_counter.pending || vol_ognt_counter.pending)
|
||||
|
||||
when(iacq_is_allocating) {
|
||||
initializeProbes()
|
||||
}
|
||||
|
||||
initDataInner(io.inner.acquire, iacq_is_allocating || iacq_is_merging)
|
||||
|
||||
// Wait for everything to quiesce
|
||||
quiesce() { clearWmaskBuffer() }
|
||||
}
|
||||
162
src/main/scala/uncore/agents/Bufferless.scala
Normal file
162
src/main/scala/uncore/agents/Bufferless.scala
Normal file
@@ -0,0 +1,162 @@
|
||||
// See LICENSE for license details.
|
||||
|
||||
package uncore.agents
|
||||
|
||||
import Chisel._
|
||||
import uncore.coherence._
|
||||
import uncore.tilelink._
|
||||
import uncore.constants._
|
||||
import cde.Parameters
|
||||
|
||||
|
||||
class BufferlessBroadcastHub(implicit p: Parameters) extends HierarchicalCoherenceAgent()(p) {
|
||||
|
||||
// Create TSHRs for outstanding transactions
|
||||
val irelTrackerList =
|
||||
(0 until nReleaseTransactors).map(id =>
|
||||
Module(new BufferlessBroadcastVoluntaryReleaseTracker(id)))
|
||||
val iacqTrackerList =
|
||||
(nReleaseTransactors until nTransactors).map(id =>
|
||||
Module(new BufferlessBroadcastAcquireTracker(id)))
|
||||
val trackerList = irelTrackerList ++ iacqTrackerList
|
||||
|
||||
// Propagate incoherence flags
|
||||
trackerList.map(_.io.incoherent) foreach { _ := io.incoherent }
|
||||
|
||||
// Create an arbiter for the one memory port
|
||||
val outerList = trackerList.map(_.io.outer)
|
||||
val outer_arb = Module(new ClientTileLinkIOArbiter(outerList.size)
|
||||
(p.alterPartial({ case TLId => p(OuterTLId) })))
|
||||
outer_arb.io.in <> outerList
|
||||
io.outer <> outer_arb.io.out
|
||||
|
||||
val iacq = Queue(io.inner.acquire, 1, pipe=true)
|
||||
val irel = Queue(io.inner.release, 1, pipe=true)
|
||||
|
||||
// Handle acquire transaction initiation
|
||||
val irel_vs_iacq_conflict =
|
||||
iacq.valid &&
|
||||
irel.valid &&
|
||||
irel.bits.conflicts(iacq.bits)
|
||||
|
||||
doInputRoutingWithAllocation(
|
||||
in = iacq,
|
||||
outs = trackerList.map(_.io.inner.acquire),
|
||||
allocs = trackerList.map(_.io.alloc.iacq),
|
||||
allocOverride = Some(!irel_vs_iacq_conflict))
|
||||
io.outer.acquire.bits.data := iacq.bits.data
|
||||
when (io.oacq().hasData()) {
|
||||
io.outer.acquire.bits.addr_beat := iacq.bits.addr_beat
|
||||
}
|
||||
|
||||
// Handle releases, which might be voluntary and might have data
|
||||
doInputRoutingWithAllocation(
|
||||
in = irel,
|
||||
outs = trackerList.map(_.io.inner.release),
|
||||
allocs = trackerList.map(_.io.alloc.irel))
|
||||
io.outer.release.bits.data := irel.bits.data
|
||||
when (io.orel().hasData()) {
|
||||
io.outer.release.bits.addr_beat := irel.bits.addr_beat
|
||||
}
|
||||
|
||||
// Wire probe requests and grant reply to clients, finish acks from clients
|
||||
doOutputArbitration(io.inner.probe, trackerList.map(_.io.inner.probe))
|
||||
|
||||
doOutputArbitration(io.inner.grant, trackerList.map(_.io.inner.grant))
|
||||
io.inner.grant.bits.data := io.outer.grant.bits.data
|
||||
io.inner.grant.bits.addr_beat := io.outer.grant.bits.addr_beat
|
||||
|
||||
doInputRouting(io.inner.finish, trackerList.map(_.io.inner.finish))
|
||||
|
||||
disconnectOuterProbeAndFinish()
|
||||
}
|
||||
|
||||
class BufferlessBroadcastVoluntaryReleaseTracker(trackerId: Int)(implicit p: Parameters)
|
||||
extends BroadcastVoluntaryReleaseTracker(trackerId)(p) {
|
||||
|
||||
// Tell the parent if any incoming messages conflict with the ongoing transaction
|
||||
routeInParent(irelCanAlloc = Bool(true))
|
||||
|
||||
// Start transaction by accepting inner release
|
||||
innerRelease(block_vol_ignt = pending_orel || vol_ognt_counter.pending)
|
||||
|
||||
// A release beat can be accepted if we are idle, if its a mergeable transaction, or if its a tail beat
|
||||
// and if the outer relase path is clear
|
||||
io.inner.release.ready := Mux(io.irel().hasData(),
|
||||
(state =/= s_idle) && (irel_can_merge || irel_same_xact) && io.outer.release.ready,
|
||||
(state === s_idle) || irel_can_merge || irel_same_xact)
|
||||
|
||||
// Dispatch outer release
|
||||
outerRelease(coh = outer_coh.onHit(M_XWR), buffering = Bool(false))
|
||||
|
||||
quiesce() {}
|
||||
}
|
||||
|
||||
class BufferlessBroadcastAcquireTracker(trackerId: Int)(implicit p: Parameters)
|
||||
extends BroadcastAcquireTracker(trackerId)(p) {
|
||||
|
||||
// Setup IOs used for routing in the parent
|
||||
routeInParent(iacqCanAlloc = Bool(true))
|
||||
|
||||
// First, take care of accpeting new acquires or secondary misses
|
||||
// Handling of primary and secondary misses' data and write mask merging
|
||||
innerAcquire(
|
||||
can_alloc = Bool(false),
|
||||
next = s_inner_probe)
|
||||
|
||||
// We are never going to merge anything in the bufferless hub
|
||||
// Therefore, we only need to concern ourselves with the allocated
|
||||
// transaction and (in case of PutBlock) subsequent tail beats
|
||||
val iacq_can_forward = iacq_same_xact && !vol_ognt_counter.pending
|
||||
io.inner.acquire.ready := Mux(io.iacq().hasData(),
|
||||
state === s_outer_acquire && iacq_can_forward && io.outer.acquire.ready,
|
||||
state === s_idle && io.alloc.iacq.should)
|
||||
|
||||
// Track which clients yet need to be probed and make Probe message
|
||||
innerProbe(
|
||||
inner_coh.makeProbe(curr_probe_dst, xact_iacq, xact_addr_block),
|
||||
s_outer_acquire)
|
||||
|
||||
// Handle incoming releases from clients, which may reduce sharer counts
|
||||
// and/or write back dirty data, and may be unexpected voluntary releases
|
||||
def irel_can_merge = io.irel().conflicts(xact_addr_block) &&
|
||||
io.irel().isVoluntary() &&
|
||||
!vol_ignt_counter.pending &&
|
||||
!(io.irel().hasData() && ognt_counter.pending) &&
|
||||
(state =/= s_idle)
|
||||
|
||||
innerRelease(block_vol_ignt = vol_ognt_counter.pending)
|
||||
|
||||
val irel_could_accept = irel_can_merge || irel_same_xact
|
||||
io.inner.release.ready := irel_could_accept &&
|
||||
(!io.irel().hasData() || io.outer.release.ready)
|
||||
|
||||
// If there was a writeback, forward it outwards
|
||||
outerRelease(
|
||||
coh = outer_coh.onHit(M_XWR),
|
||||
buffering = Bool(false),
|
||||
block_orel = !irel_could_accept)
|
||||
|
||||
// Send outer request for miss
|
||||
outerAcquire(
|
||||
caching = !xact_iacq.isBuiltInType(),
|
||||
block_outer_acquire = vol_ognt_counter.pending,
|
||||
buffering = Bool(false),
|
||||
coh = outer_coh,
|
||||
next = s_busy)
|
||||
|
||||
// Handle the response from outer memory
|
||||
when (ognt_counter.pending && io.ognt().hasData()) {
|
||||
io.outer.grant.ready := io.inner.grant.ready // bypass data
|
||||
}
|
||||
|
||||
// Acknowledge or respond with data
|
||||
innerGrant(
|
||||
external_pending = pending_orel || vol_ognt_counter.pending,
|
||||
buffering = Bool(false))
|
||||
|
||||
when(iacq_is_allocating) { initializeProbes() }
|
||||
|
||||
// Wait for everything to quiesce
|
||||
quiesce() {}
|
||||
}
|
||||
1145
src/main/scala/uncore/agents/Cache.scala
Normal file
1145
src/main/scala/uncore/agents/Cache.scala
Normal file
File diff suppressed because it is too large
Load Diff
146
src/main/scala/uncore/agents/Ecc.scala
Normal file
146
src/main/scala/uncore/agents/Ecc.scala
Normal file
@@ -0,0 +1,146 @@
|
||||
// See LICENSE for license details.
|
||||
|
||||
package uncore.agents
|
||||
|
||||
import Chisel._
|
||||
import uncore.util._
|
||||
|
||||
abstract class Decoding
|
||||
{
|
||||
def uncorrected: UInt
|
||||
def corrected: UInt
|
||||
def correctable: Bool
|
||||
def uncorrectable: Bool
|
||||
def error = correctable || uncorrectable
|
||||
}
|
||||
|
||||
abstract class Code
|
||||
{
|
||||
def width(w0: Int): Int
|
||||
def encode(x: UInt): UInt
|
||||
def decode(x: UInt): Decoding
|
||||
}
|
||||
|
||||
class IdentityCode extends Code
|
||||
{
|
||||
def width(w0: Int) = w0
|
||||
def encode(x: UInt) = x
|
||||
def decode(y: UInt) = new Decoding {
|
||||
def uncorrected = y
|
||||
def corrected = y
|
||||
def correctable = Bool(false)
|
||||
def uncorrectable = Bool(false)
|
||||
}
|
||||
}
|
||||
|
||||
class ParityCode extends Code
|
||||
{
|
||||
def width(w0: Int) = w0+1
|
||||
def encode(x: UInt) = Cat(x.xorR, x)
|
||||
def decode(y: UInt) = new Decoding {
|
||||
def uncorrected = y(y.getWidth-2,0)
|
||||
def corrected = uncorrected
|
||||
def correctable = Bool(false)
|
||||
def uncorrectable = y.xorR
|
||||
}
|
||||
}
|
||||
|
||||
class SECCode extends Code
|
||||
{
|
||||
def width(k: Int) = {
|
||||
val m = log2Floor(k) + 1
|
||||
k + m + (if((1 << m) < m+k+1) 1 else 0)
|
||||
}
|
||||
def encode(x: UInt) = {
|
||||
val k = x.getWidth
|
||||
require(k > 0)
|
||||
val n = width(k)
|
||||
|
||||
val y = for (i <- 1 to n) yield {
|
||||
if (isPow2(i)) {
|
||||
val r = for (j <- 1 to n; if j != i && (j & i) != 0)
|
||||
yield x(mapping(j))
|
||||
r reduce (_^_)
|
||||
} else
|
||||
x(mapping(i))
|
||||
}
|
||||
y.asUInt
|
||||
}
|
||||
def decode(y: UInt) = new Decoding {
|
||||
val n = y.getWidth
|
||||
require(n > 0 && !isPow2(n))
|
||||
|
||||
val p2 = for (i <- 0 until log2Up(n)) yield 1 << i
|
||||
val syndrome = (p2 map { i =>
|
||||
val r = for (j <- 1 to n; if (j & i) != 0)
|
||||
yield y(j-1)
|
||||
r reduce (_^_)
|
||||
}).asUInt
|
||||
|
||||
private def swizzle(z: UInt) = (1 to n).filter(i => !isPow2(i)).map(i => z(i-1)).asUInt
|
||||
def uncorrected = swizzle(y)
|
||||
def corrected = swizzle(((y << 1) ^ UIntToOH(syndrome)) >> 1)
|
||||
def correctable = syndrome.orR
|
||||
def uncorrectable = Bool(false)
|
||||
}
|
||||
private def mapping(i: Int) = i-1-log2Up(i)
|
||||
}
|
||||
|
||||
class SECDEDCode extends Code
|
||||
{
|
||||
private val sec = new SECCode
|
||||
private val par = new ParityCode
|
||||
|
||||
def width(k: Int) = sec.width(k)+1
|
||||
def encode(x: UInt) = par.encode(sec.encode(x))
|
||||
def decode(x: UInt) = new Decoding {
|
||||
val secdec = sec.decode(x(x.getWidth-2,0))
|
||||
val pardec = par.decode(x)
|
||||
|
||||
def uncorrected = secdec.uncorrected
|
||||
def corrected = secdec.corrected
|
||||
def correctable = pardec.uncorrectable
|
||||
def uncorrectable = !pardec.uncorrectable && secdec.correctable
|
||||
}
|
||||
}
|
||||
|
||||
object ErrGen
|
||||
{
|
||||
// generate a 1-bit error with approximate probability 2^-f
|
||||
def apply(width: Int, f: Int): UInt = {
|
||||
require(width > 0 && f >= 0 && log2Up(width) + f <= 16)
|
||||
UIntToOH(LFSR16()(log2Up(width)+f-1,0))(width-1,0)
|
||||
}
|
||||
def apply(x: UInt, f: Int): UInt = x ^ apply(x.getWidth, f)
|
||||
}
|
||||
|
||||
class SECDEDTest extends Module
|
||||
{
|
||||
val code = new SECDEDCode
|
||||
val k = 4
|
||||
val n = code.width(k)
|
||||
|
||||
val io = new Bundle {
|
||||
val original = Bits(OUTPUT, k)
|
||||
val encoded = Bits(OUTPUT, n)
|
||||
val injected = Bits(OUTPUT, n)
|
||||
val uncorrected = Bits(OUTPUT, k)
|
||||
val corrected = Bits(OUTPUT, k)
|
||||
val correctable = Bool(OUTPUT)
|
||||
val uncorrectable = Bool(OUTPUT)
|
||||
}
|
||||
|
||||
val c = Counter(Bool(true), 1 << k)
|
||||
val numErrors = Counter(c._2, 3)._1
|
||||
val e = code.encode(c._1)
|
||||
val i = e ^ Mux(numErrors < UInt(1), UInt(0), ErrGen(n, 1)) ^ Mux(numErrors < UInt(2), UInt(0), ErrGen(n, 1))
|
||||
val d = code.decode(i)
|
||||
|
||||
io.original := c._1
|
||||
io.encoded := e
|
||||
io.injected := i
|
||||
io.uncorrected := d.uncorrected
|
||||
io.corrected := d.corrected
|
||||
io.correctable := d.correctable
|
||||
io.uncorrectable := d.uncorrectable
|
||||
}
|
||||
73
src/main/scala/uncore/agents/Mmio.scala
Normal file
73
src/main/scala/uncore/agents/Mmio.scala
Normal file
@@ -0,0 +1,73 @@
|
||||
package uncore.agents
|
||||
|
||||
import Chisel._
|
||||
import uncore.tilelink._
|
||||
import cde.Parameters
|
||||
|
||||
class MMIOTileLinkManagerData(implicit p: Parameters)
|
||||
extends TLBundle()(p)
|
||||
with HasClientId
|
||||
with HasClientTransactionId
|
||||
|
||||
class MMIOTileLinkManager(implicit p: Parameters)
|
||||
extends CoherenceAgentModule()(p) {
|
||||
val io = new ManagerTLIO
|
||||
|
||||
// MMIO requests should never need probe or release
|
||||
io.inner.probe.valid := Bool(false)
|
||||
io.inner.release.ready := Bool(false)
|
||||
|
||||
val multibeat_fire = io.outer.acquire.fire() && io.oacq().hasMultibeatData()
|
||||
val multibeat_start = multibeat_fire && io.oacq().addr_beat === UInt(0)
|
||||
val multibeat_end = multibeat_fire && io.oacq().addr_beat === UInt(outerDataBeats - 1)
|
||||
|
||||
// Acquire and Grant are basically passthru,
|
||||
// except client_id and client_xact_id need to be converted.
|
||||
// Associate the inner client_id and client_xact_id
|
||||
// with the outer client_xact_id.
|
||||
val xact_pending = Reg(init = UInt(0, maxManagerXacts))
|
||||
val xact_id_sel = PriorityEncoder(~xact_pending)
|
||||
val xact_id_reg = RegEnable(xact_id_sel, multibeat_start)
|
||||
val xact_multibeat = Reg(init = Bool(false))
|
||||
val outer_xact_id = Mux(xact_multibeat, xact_id_reg, xact_id_sel)
|
||||
val xact_free = !xact_pending.andR
|
||||
val xact_buffer = Reg(Vec(maxManagerXacts, new MMIOTileLinkManagerData))
|
||||
|
||||
io.inner.acquire.ready := io.outer.acquire.ready && xact_free
|
||||
io.outer.acquire.valid := io.inner.acquire.valid && xact_free
|
||||
io.outer.acquire.bits := io.inner.acquire.bits
|
||||
io.outer.acquire.bits.client_xact_id := outer_xact_id
|
||||
|
||||
def isLastBeat[T <: TileLinkChannel with HasTileLinkBeatId](in: T): Bool =
|
||||
!in.hasMultibeatData() || in.addr_beat === UInt(outerDataBeats - 1)
|
||||
|
||||
def addPendingBitOnAcq[T <: AcquireMetadata](in: DecoupledIO[T]): UInt =
|
||||
Mux(in.fire() && isLastBeat(in.bits), UIntToOH(in.bits.client_xact_id), UInt(0))
|
||||
|
||||
def clearPendingBitOnGnt[T <: GrantMetadata](in: DecoupledIO[T]): UInt =
|
||||
~Mux(in.fire() && isLastBeat(in.bits) && !in.bits.requiresAck(),
|
||||
UIntToOH(in.bits.manager_xact_id), UInt(0))
|
||||
|
||||
def clearPendingBitOnFin(in: DecoupledIO[Finish]): UInt =
|
||||
~Mux(in.fire(), UIntToOH(in.bits.manager_xact_id), UInt(0))
|
||||
|
||||
xact_pending := (xact_pending | addPendingBitOnAcq(io.outer.acquire)) &
|
||||
clearPendingBitOnFin(io.inner.finish) &
|
||||
clearPendingBitOnGnt(io.inner.grant)
|
||||
|
||||
when (io.outer.acquire.fire() && isLastBeat(io.outer.acquire.bits)) {
|
||||
xact_buffer(outer_xact_id) := io.iacq()
|
||||
}
|
||||
|
||||
when (multibeat_start) { xact_multibeat := Bool(true) }
|
||||
when (multibeat_end) { xact_multibeat := Bool(false) }
|
||||
|
||||
val gnt_xact = xact_buffer(io.ognt().client_xact_id)
|
||||
io.outer.grant.ready := io.inner.grant.ready
|
||||
io.inner.grant.valid := io.outer.grant.valid
|
||||
io.inner.grant.bits := io.outer.grant.bits
|
||||
io.inner.grant.bits.client_id := gnt_xact.client_id
|
||||
io.inner.grant.bits.client_xact_id := gnt_xact.client_xact_id
|
||||
io.inner.grant.bits.manager_xact_id := io.ognt().client_xact_id
|
||||
io.inner.finish.ready := Bool(true)
|
||||
}
|
||||
69
src/main/scala/uncore/agents/StatelessBridge.scala
Normal file
69
src/main/scala/uncore/agents/StatelessBridge.scala
Normal file
@@ -0,0 +1,69 @@
|
||||
// See LICENSE for license details.
|
||||
|
||||
package uncore.agents
|
||||
|
||||
import Chisel._
|
||||
import uncore.coherence._
|
||||
import uncore.tilelink._
|
||||
import uncore.constants._
|
||||
import uncore.devices._
|
||||
import cde.{Parameters, Field, Config}
|
||||
|
||||
/** The ManagerToClientStateless Bridge does not maintain any state for the messages
|
||||
* which pass through it. It simply passes the messages back and forth without any
|
||||
* tracking or translation.
|
||||
*
|
||||
* This can reduce area and timing in very constrained situations:
|
||||
* - The Manager and Client implement the same coherence protocol
|
||||
* - There are no probe or finish messages.
|
||||
* - The outer transaction ID is large enough to handle all possible inner
|
||||
* transaction IDs, such that no remapping state must be maintained.
|
||||
*
|
||||
* This bridge DOES NOT keep the uncached channel coherent with the cached
|
||||
* channel. Uncached requests to blocks cached by the L1 will not probe the L1.
|
||||
* As a result, uncached reads to cached blocks will get stale data until
|
||||
* the L1 performs a voluntary writeback, and uncached writes to cached blocks
|
||||
* will get lost, as the voluntary writeback from the L1 will overwrite the
|
||||
* changes. If your tile relies on probing the L1 data cache in order to
|
||||
* share data between the instruction cache and data cache (e.g. you are using
|
||||
* a non-blocking L1 D$) or if the tile has uncached channels capable of
|
||||
* writes (e.g. Hwacha and other RoCC accelerators), DO NOT USE THIS BRIDGE.
|
||||
*/
|
||||
|
||||
class ManagerToClientStatelessBridge(implicit p: Parameters) extends HierarchicalCoherenceAgent()(p) {
|
||||
val icid = io.inner.tlClientIdBits
|
||||
val ixid = io.inner.tlClientXactIdBits
|
||||
val oxid = io.outer.tlClientXactIdBits
|
||||
|
||||
val innerCoh = io.inner.tlCoh.getClass
|
||||
val outerCoh = io.outer.tlCoh.getClass
|
||||
|
||||
// Stateless Bridge is only usable in certain constrained situations.
|
||||
// Sanity check its usage here.
|
||||
|
||||
require(io.inner.tlNCachingClients <= 1)
|
||||
require(icid + ixid <= oxid)
|
||||
require(innerCoh eq outerCoh,
|
||||
s"Coherence policies do not match: inner is ${innerCoh.getSimpleName}, outer is ${outerCoh.getSimpleName}")
|
||||
|
||||
io.outer.acquire.valid := io.inner.acquire.valid
|
||||
io.inner.acquire.ready := io.outer.acquire.ready
|
||||
io.outer.acquire.bits := io.inner.acquire.bits
|
||||
io.outer.acquire.bits.client_xact_id := Cat(io.inner.acquire.bits.client_id, io.inner.acquire.bits.client_xact_id)
|
||||
|
||||
io.outer.release.valid := io.inner.release.valid
|
||||
io.inner.release.ready := io.outer.release.ready
|
||||
io.outer.release.bits := io.inner.release.bits
|
||||
io.outer.release.bits.client_xact_id := Cat(io.inner.release.bits.client_id, io.inner.release.bits.client_xact_id)
|
||||
|
||||
io.inner.grant.valid := io.outer.grant.valid
|
||||
io.outer.grant.ready := io.inner.grant.ready
|
||||
io.inner.grant.bits := io.outer.grant.bits
|
||||
io.inner.grant.bits.client_xact_id := io.outer.grant.bits.client_xact_id(ixid-1, 0)
|
||||
io.inner.grant.bits.client_id := io.outer.grant.bits.client_xact_id(icid+ixid-1, ixid)
|
||||
|
||||
io.inner.probe.valid := Bool(false)
|
||||
io.inner.finish.ready := Bool(true)
|
||||
|
||||
disconnectOuterProbeAndFinish()
|
||||
}
|
||||
119
src/main/scala/uncore/agents/StoreDataQueue.scala
Normal file
119
src/main/scala/uncore/agents/StoreDataQueue.scala
Normal file
@@ -0,0 +1,119 @@
|
||||
// See LICENSE for license details.
|
||||
|
||||
package uncore.agents
|
||||
import Chisel._
|
||||
import uncore.tilelink._
|
||||
import cde.{Parameters, Field}
|
||||
|
||||
case object L2StoreDataQueueDepth extends Field[Int]
|
||||
|
||||
trait HasStoreDataQueueParameters extends HasCoherenceAgentParameters {
|
||||
val sdqDepth = p(L2StoreDataQueueDepth)*innerDataBeats
|
||||
val dqIdxBits = math.max(log2Up(nReleaseTransactors) + 1, log2Up(sdqDepth))
|
||||
val nDataQueueLocations = 3 //Stores, VoluntaryWBs, Releases
|
||||
}
|
||||
|
||||
class DataQueueLocation(implicit p: Parameters) extends CoherenceAgentBundle()(p)
|
||||
with HasStoreDataQueueParameters {
|
||||
val idx = UInt(width = dqIdxBits)
|
||||
val loc = UInt(width = log2Ceil(nDataQueueLocations))
|
||||
}
|
||||
|
||||
object DataQueueLocation {
|
||||
def apply(idx: UInt, loc: UInt)(implicit p: Parameters) = {
|
||||
val d = Wire(new DataQueueLocation)
|
||||
d.idx := idx
|
||||
d.loc := loc
|
||||
d
|
||||
}
|
||||
}
|
||||
|
||||
trait HasStoreDataQueue extends HasStoreDataQueueParameters {
|
||||
val io: HierarchicalTLIO
|
||||
val trackerIOsList: Seq[HierarchicalXactTrackerIO]
|
||||
|
||||
val internalDataBits = new DataQueueLocation().getWidth
|
||||
val inStoreQueue :: inVolWBQueue :: inClientReleaseQueue :: Nil = Enum(UInt(), nDataQueueLocations)
|
||||
|
||||
val usingStoreDataQueue = p.alterPartial({
|
||||
case TLKey(`innerTLId`) => innerTLParams.copy(overrideDataBitsPerBeat = Some(internalDataBits))
|
||||
case TLKey(`outerTLId`) => outerTLParams.copy(overrideDataBitsPerBeat = Some(internalDataBits))
|
||||
})
|
||||
|
||||
// Queue to store impending Put data
|
||||
lazy val sdq = Reg(Vec(sdqDepth, io.iacq().data))
|
||||
lazy val sdq_val = Reg(init=Bits(0, sdqDepth))
|
||||
lazy val sdq_alloc_id = PriorityEncoder(~sdq_val)
|
||||
lazy val sdq_rdy = !sdq_val.andR
|
||||
lazy val sdq_enq = trackerIOsList.map( t =>
|
||||
(t.alloc.iacq.should || t.alloc.iacq.matches) &&
|
||||
t.inner.acquire.fire() &&
|
||||
t.iacq().hasData()
|
||||
).reduce(_||_)
|
||||
|
||||
lazy val sdqLoc = List.fill(nTransactors) {
|
||||
DataQueueLocation(sdq_alloc_id, inStoreQueue).asUInt
|
||||
}
|
||||
|
||||
/*
|
||||
doInputRoutingWithAllocation(
|
||||
in = io.inner.acquire,
|
||||
outs = trackerList.map(_.io.inner.acquire),
|
||||
allocs = trackerList.map(_.io.alloc._iacq),
|
||||
dataOverride = Some(sdqLoc),
|
||||
allocOverride = Some(sdq_rdy && !irel_vs_iacq_conflict))
|
||||
*/
|
||||
|
||||
// Queue to store impending Voluntary Release data
|
||||
lazy val voluntary = io.irel().isVoluntary()
|
||||
lazy val vwbdq_enq = io.inner.release.fire() && voluntary && io.irel().hasData()
|
||||
lazy val (rel_data_cnt, rel_data_done) = Counter(vwbdq_enq, innerDataBeats) //TODO Zero width
|
||||
lazy val vwbdq = Reg(Vec(innerDataBeats, io.irel().data)) //TODO Assumes nReleaseTransactors == 1
|
||||
|
||||
|
||||
lazy val vwbqLoc = (0 until nTransactors).map(i =>
|
||||
(DataQueueLocation(rel_data_cnt,
|
||||
(if(i < nReleaseTransactors) inVolWBQueue
|
||||
else inClientReleaseQueue)).asUInt))
|
||||
/*
|
||||
doInputRoutingWithAllocation(
|
||||
io.inner.release,
|
||||
trackerList.map(_.io.inner.release),
|
||||
trackerList.map(_.io.matches.irel),
|
||||
trackerList.map(_.io.alloc.irel),
|
||||
Some(vwbqLoc))
|
||||
*/
|
||||
|
||||
val outer_arb: ClientTileLinkIOArbiter
|
||||
lazy val outer_data_ptr = new DataQueueLocation().fromBits(outer_arb.io.out.acquire.bits.data)
|
||||
/*
|
||||
val outer_arb = Module(new ClientTileLinkIOArbiter(trackerList.size)
|
||||
(usingStoreDataQueue.alterPartial({ case TLId => p(OuterTLId) })))
|
||||
outer_arb.io.in <> trackerList
|
||||
*/
|
||||
// Get the pending data out of the store data queue
|
||||
lazy val is_in_sdq = outer_data_ptr.loc === inStoreQueue
|
||||
lazy val free_sdq = io.outer.acquire.fire() &&
|
||||
io.outer.acquire.bits.hasData() &&
|
||||
outer_data_ptr.loc === inStoreQueue
|
||||
/*
|
||||
io.outer <> outer_arb.io.out
|
||||
io.outer.acquire.bits.data := MuxLookup(outer_data_ptr.loc, io.irel().data, Array(
|
||||
inStoreQueue -> sdq(outer_data_ptr.idx),
|
||||
inVolWBQueue -> vwbdq(outer_data_ptr.idx)))
|
||||
*/
|
||||
|
||||
// Enqueue SDQ data
|
||||
def sdqEnqueue() {
|
||||
when (sdq_enq) { sdq(sdq_alloc_id) := io.iacq().data }
|
||||
when(vwbdq_enq) { vwbdq(rel_data_cnt) := io.irel().data }
|
||||
}
|
||||
|
||||
// Update SDQ valid bits
|
||||
def sdqUpdate() {
|
||||
when (io.outer.acquire.valid || sdq_enq) {
|
||||
sdq_val := sdq_val & ~(UIntToOH(outer_data_ptr.idx) & Fill(sdqDepth, free_sdq)) |
|
||||
PriorityEncoderOH(~sdq_val(sdqDepth-1,0)) & Fill(sdqDepth, sdq_enq)
|
||||
}
|
||||
}
|
||||
}
|
||||
654
src/main/scala/uncore/agents/Trackers.scala
Normal file
654
src/main/scala/uncore/agents/Trackers.scala
Normal file
@@ -0,0 +1,654 @@
|
||||
// See LICENSE for license details.
|
||||
|
||||
package uncore.agents
|
||||
|
||||
import Chisel._
|
||||
import uncore.coherence._
|
||||
import uncore.tilelink._
|
||||
import uncore.util._
|
||||
import uncore.util._
|
||||
import junctions._
|
||||
import cde.{Field, Parameters}
|
||||
import scala.math.max
|
||||
|
||||
case object EnableL2Logging extends Field[Boolean]
|
||||
|
||||
class TrackerAllocation extends Bundle {
|
||||
val matches = Bool(OUTPUT)
|
||||
val can = Bool(OUTPUT)
|
||||
val should = Bool(INPUT)
|
||||
}
|
||||
|
||||
class TrackerAllocationIO(implicit val p: Parameters)
|
||||
extends ParameterizedBundle()(p)
|
||||
with HasCacheBlockAddress {
|
||||
val iacq = new TrackerAllocation
|
||||
val irel = new TrackerAllocation
|
||||
val oprb = new TrackerAllocation
|
||||
val idle = Bool(OUTPUT)
|
||||
override val addr_block = UInt(OUTPUT, tlBlockAddrBits)
|
||||
}
|
||||
|
||||
trait HasTrackerAllocationIO extends Bundle {
|
||||
implicit val p: Parameters
|
||||
val alloc = new TrackerAllocationIO
|
||||
}
|
||||
|
||||
class ManagerXactTrackerIO(implicit p: Parameters) extends ManagerTLIO()(p)
|
||||
with HasTrackerAllocationIO
|
||||
|
||||
class HierarchicalXactTrackerIO(implicit p: Parameters) extends HierarchicalTLIO()(p)
|
||||
with HasTrackerAllocationIO
|
||||
|
||||
abstract class XactTracker(implicit p: Parameters) extends CoherenceAgentModule()(p)
|
||||
with HasXactTrackerStates
|
||||
with HasPendingBitHelpers {
|
||||
override val s_idle :: s_meta_read :: s_meta_resp :: s_wb_req :: s_wb_resp :: s_inner_probe :: s_outer_acquire :: s_busy :: s_meta_write :: Nil = Enum(UInt(), 9)
|
||||
val state = Reg(init=s_idle)
|
||||
|
||||
def quiesce(next: UInt = s_idle)(restore: => Unit) {
|
||||
all_pending_done := !scoreboard.foldLeft(Bool(false))(_||_)
|
||||
when(state === s_busy && all_pending_done) {
|
||||
state := next
|
||||
restore
|
||||
}
|
||||
}
|
||||
|
||||
def pinAllReadyValidLow[T <: Data](b: Bundle) {
|
||||
b.elements.foreach {
|
||||
_._2 match {
|
||||
case d: DecoupledIO[_] =>
|
||||
if(d.ready.dir == OUTPUT) d.ready := Bool(false)
|
||||
else if(d.valid.dir == OUTPUT) d.valid := Bool(false)
|
||||
case v: ValidIO[_] => if(v.valid.dir == OUTPUT) v.valid := Bool(false)
|
||||
case b: Bundle => pinAllReadyValidLow(b)
|
||||
case _ =>
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
trait HasXactTrackerStates {
|
||||
def state: UInt
|
||||
def s_idle: UInt = UInt(0)
|
||||
def s_meta_read: UInt
|
||||
def s_meta_resp: UInt
|
||||
def s_wb_req: UInt
|
||||
def s_wb_resp: UInt
|
||||
def s_inner_probe: UInt
|
||||
def s_outer_acquire: UInt
|
||||
def s_busy: UInt
|
||||
def s_meta_write: UInt
|
||||
}
|
||||
|
||||
trait HasPendingBitHelpers extends HasDataBeatCounters {
|
||||
val scoreboard = scala.collection.mutable.ListBuffer.empty[Bool]
|
||||
val all_pending_done = Wire(Bool())
|
||||
|
||||
def addPendingBitWhenBeat[T <: HasBeat](inc: Bool, in: T): UInt =
|
||||
Fill(in.tlDataBeats, inc) & UIntToOH(in.addr_beat)
|
||||
|
||||
def dropPendingBitWhenBeat[T <: HasBeat](dec: Bool, in: T): UInt =
|
||||
~Fill(in.tlDataBeats, dec) | ~UIntToOH(in.addr_beat)
|
||||
|
||||
def addPendingBitWhenId[T <: HasClientId](inc: Bool, in: T): UInt =
|
||||
Fill(in.tlNCachingClients, inc) & UIntToOH(in.client_id)
|
||||
|
||||
def dropPendingBitWhenId[T <: HasClientId](dec: Bool, in: T): UInt =
|
||||
~Fill(in.tlNCachingClients, dec) | ~UIntToOH(in.client_id)
|
||||
|
||||
def addPendingBitWhenBeatHasData[T <: HasBeat](in: DecoupledIO[T], inc: Bool = Bool(true)): UInt =
|
||||
addPendingBitWhenBeat(in.fire() && in.bits.hasData() && inc, in.bits)
|
||||
|
||||
def addPendingBitWhenBeatHasDataAndAllocs(
|
||||
in: DecoupledIO[AcquireFromSrc],
|
||||
alloc_override: Bool = Bool(false)): UInt =
|
||||
addPendingBitWhenBeatHasData(in, in.bits.allocate() || alloc_override)
|
||||
|
||||
def addPendingBitWhenBeatNeedsRead(in: DecoupledIO[AcquireFromSrc],
|
||||
always: Bool = Bool(true), unless: Bool = Bool(false)): UInt = {
|
||||
val a = in.bits
|
||||
val needs_read = !unless && (a.isGet() || a.isAtomic() || a.hasPartialWritemask()) || always
|
||||
addPendingBitWhenBeat(in.fire() && needs_read, a)
|
||||
}
|
||||
|
||||
def addPendingBitWhenBeatHasPartialWritemask(in: DecoupledIO[AcquireFromSrc]): UInt =
|
||||
addPendingBitWhenBeat(in.fire() && in.bits.hasPartialWritemask(), in.bits)
|
||||
|
||||
def addPendingBitsFromAcquire(a: SecondaryMissInfo): UInt =
|
||||
Mux(a.hasMultibeatData(), Fill(a.tlDataBeats, UInt(1, 1)), UIntToOH(a.addr_beat))
|
||||
|
||||
def dropPendingBitWhenBeatHasData[T <: HasBeat](in: DecoupledIO[T]): UInt =
|
||||
dropPendingBitWhenBeat(in.fire() && in.bits.hasData(), in.bits)
|
||||
|
||||
def dropPendingBitAtDest[T <: HasId](in: DecoupledIO[T]): UInt =
|
||||
dropPendingBitWhenId(in.fire(), in.bits)
|
||||
|
||||
def dropPendingBitAtDestWhenVoluntary[T <: HasId with MightBeVoluntary](in: DecoupledIO[T]): UInt =
|
||||
dropPendingBitWhenId(in.fire() && in.bits.isVoluntary(), in.bits)
|
||||
|
||||
def addPendingBitAtSrc[T <: HasId](in: DecoupledIO[T]): UInt =
|
||||
addPendingBitWhenId(in.fire(), in.bits)
|
||||
|
||||
def addPendingBitAtSrcWhenVoluntary[T <: HasId with MightBeVoluntary](in: DecoupledIO[T]): UInt =
|
||||
addPendingBitWhenId(in.fire() && in.bits.isVoluntary(), in.bits)
|
||||
|
||||
def addOtherBits(en: Bool, nBits: Int): UInt =
|
||||
Mux(en, Cat(Fill(nBits - 1, UInt(1, 1)), UInt(0, 1)), UInt(0, nBits))
|
||||
|
||||
def addPendingBitsOnFirstBeat(in: DecoupledIO[Acquire]): UInt =
|
||||
addOtherBits(in.fire() &&
|
||||
in.bits.hasMultibeatData() &&
|
||||
in.bits.addr_beat === UInt(0),
|
||||
in.bits.tlDataBeats)
|
||||
|
||||
def dropPendingBitsOnFirstBeat(in: DecoupledIO[Acquire]): UInt =
|
||||
~addPendingBitsOnFirstBeat(in)
|
||||
}
|
||||
|
||||
trait HasDataBuffer extends HasCoherenceAgentParameters {
|
||||
val data_buffer = Reg(init=Vec.fill(innerDataBeats)(UInt(0, width = innerDataBits)))
|
||||
|
||||
type TLDataBundle = TLBundle with HasTileLinkData with HasTileLinkBeatId
|
||||
|
||||
def initDataInner[T <: Acquire](in: DecoupledIO[T], alloc: Bool) {
|
||||
when(in.fire() && in.bits.hasData() && alloc) {
|
||||
data_buffer(in.bits.addr_beat) := in.bits.data
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: provide func for accessing when innerDataBeats =/= outerDataBeats or internalDataBeats
|
||||
def mergeData(dataBits: Int)(beat: UInt, incoming: UInt) {
|
||||
data_buffer(beat) := incoming
|
||||
}
|
||||
|
||||
def mergeDataInner[T <: TLDataBundle](in: DecoupledIO[T]) {
|
||||
when(in.fire() && in.bits.hasData()) {
|
||||
mergeData(innerDataBits)(in.bits.addr_beat, in.bits.data)
|
||||
}
|
||||
}
|
||||
|
||||
def mergeDataOuter[T <: TLDataBundle](in: DecoupledIO[T]) {
|
||||
when(in.fire() && in.bits.hasData()) {
|
||||
mergeData(outerDataBits)(in.bits.addr_beat, in.bits.data)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
trait HasByteWriteMaskBuffer extends HasDataBuffer {
|
||||
val wmask_buffer = Reg(init=Vec.fill(innerDataBeats)(UInt(0, width = innerWriteMaskBits)))
|
||||
val data_valid = Vec(wmask_buffer.map(wmask => wmask.andR))
|
||||
|
||||
override def initDataInner[T <: Acquire](in: DecoupledIO[T], alloc: Bool) {
|
||||
when(in.fire() && in.bits.hasData() && alloc) {
|
||||
val beat = in.bits.addr_beat
|
||||
val full = FillInterleaved(8, in.bits.wmask())
|
||||
data_buffer(beat) := (~full & data_buffer(beat)) | (full & in.bits.data)
|
||||
wmask_buffer(beat) := in.bits.wmask() | wmask_buffer(beat) // assumes wmask_buffer is zeroed
|
||||
}
|
||||
}
|
||||
|
||||
override def mergeData(dataBits: Int)(beat: UInt, incoming: UInt) {
|
||||
val old_data = incoming // Refilled, written back, or de-cached data
|
||||
val new_data = data_buffer(beat) // Newly Put data is already in the buffer
|
||||
val wmask = FillInterleaved(8, wmask_buffer(beat))
|
||||
data_buffer(beat) := (~wmask & old_data) | (wmask & new_data)
|
||||
wmask_buffer(beat) := ~UInt(0, innerWriteMaskBits)
|
||||
}
|
||||
|
||||
def clearWmaskBuffer() {
|
||||
wmask_buffer.foreach { w => w := UInt(0) }
|
||||
}
|
||||
}
|
||||
|
||||
trait HasBlockAddressBuffer extends HasCoherenceAgentParameters {
|
||||
val xact_addr_block = Reg(init = UInt(0, width = blockAddrBits))
|
||||
}
|
||||
|
||||
|
||||
trait HasAcquireMetadataBuffer extends HasBlockAddressBuffer {
|
||||
val xact_allocate = Reg{ Bool() }
|
||||
val xact_amo_shift_bytes = Reg{ UInt() }
|
||||
val xact_op_code = Reg{ UInt() }
|
||||
val xact_addr_byte = Reg{ UInt() }
|
||||
val xact_op_size = Reg{ UInt() }
|
||||
val xact_addr_beat = Wire(UInt())
|
||||
val xact_iacq = Wire(new SecondaryMissInfo)
|
||||
}
|
||||
|
||||
trait HasVoluntaryReleaseMetadataBuffer extends HasBlockAddressBuffer
|
||||
with HasPendingBitHelpers
|
||||
with HasXactTrackerStates {
|
||||
def io: HierarchicalXactTrackerIO
|
||||
|
||||
val xact_vol_ir_r_type = Reg{ UInt() }
|
||||
val xact_vol_ir_src = Reg{ UInt() }
|
||||
val xact_vol_ir_client_xact_id = Reg{ UInt() }
|
||||
|
||||
def xact_vol_irel = Release(
|
||||
src = xact_vol_ir_src,
|
||||
voluntary = Bool(true),
|
||||
r_type = xact_vol_ir_r_type,
|
||||
client_xact_id = xact_vol_ir_client_xact_id,
|
||||
addr_block = xact_addr_block)
|
||||
(p.alterPartial({ case TLId => p(InnerTLId) }))
|
||||
}
|
||||
|
||||
trait AcceptsVoluntaryReleases extends HasVoluntaryReleaseMetadataBuffer {
|
||||
def inner_coh: ManagerMetadata
|
||||
|
||||
val pending_irel_data = Reg(init=Bits(0, width = innerDataBeats))
|
||||
val vol_ignt_counter = Wire(new TwoWayBeatCounterStatus)
|
||||
|
||||
def irel_can_merge: Bool
|
||||
def irel_same_xact: Bool
|
||||
def irel_is_allocating: Bool = state === s_idle && io.alloc.irel.should && io.inner.release.valid
|
||||
def irel_is_merging: Bool = (irel_can_merge || irel_same_xact) && io.inner.release.valid
|
||||
|
||||
def innerRelease(block_vol_ignt: Bool = Bool(false), next: UInt = s_busy) {
|
||||
connectTwoWayBeatCounters(
|
||||
status = vol_ignt_counter,
|
||||
up = io.inner.release,
|
||||
down = io.inner.grant,
|
||||
trackUp = (r: Release) => {
|
||||
Mux(state === s_idle, io.alloc.irel.should, io.alloc.irel.matches) && r.isVoluntary() && r.requiresAck()
|
||||
},
|
||||
trackDown = (g: Grant) => (state =/= s_idle) && g.isVoluntary())
|
||||
|
||||
|
||||
when(irel_is_allocating) {
|
||||
xact_addr_block := io.irel().addr_block
|
||||
// Set all of them to pending in the beginning as a precaution
|
||||
// If it turns out we don't need some or all of the beats, they will
|
||||
// be overridden below
|
||||
pending_irel_data := ~UInt(0, innerDataBeats)
|
||||
state := next
|
||||
}
|
||||
|
||||
val irel_fire = (irel_is_allocating || irel_is_merging) && io.inner.release.ready
|
||||
when (irel_fire) {
|
||||
when (io.irel().first()) {
|
||||
when (io.irel().isVoluntary()) {
|
||||
xact_vol_ir_r_type := io.irel().r_type
|
||||
xact_vol_ir_src := io.irel().client_id
|
||||
xact_vol_ir_client_xact_id := io.irel().client_xact_id
|
||||
}
|
||||
// If this release has data, set all the pending bits except the first.
|
||||
// Otherwise, clear all the pending bits
|
||||
pending_irel_data := Mux(io.irel().hasMultibeatData(),
|
||||
dropPendingBitWhenBeatHasData(io.inner.release),
|
||||
UInt(0))
|
||||
} .otherwise {
|
||||
pending_irel_data := (pending_irel_data & dropPendingBitWhenBeatHasData(io.inner.release))
|
||||
}
|
||||
if (p(EnableL2Logging)) {
|
||||
when (io.irel().hasData()) {
|
||||
printf("[release] addr_block=%x addr_beat=%d data=%x\n",
|
||||
io.irel().addr_block, io.irel().addr_beat, io.irel().data)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
io.inner.grant.valid := state.isOneOf(s_wb_req, s_wb_resp, s_inner_probe, s_busy) &&
|
||||
vol_ignt_counter.pending &&
|
||||
!(pending_irel_data.orR || block_vol_ignt)
|
||||
|
||||
io.inner.grant.bits := inner_coh.makeGrant(xact_vol_irel)
|
||||
|
||||
scoreboard += (pending_irel_data.orR, vol_ignt_counter.pending)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
trait EmitsVoluntaryReleases extends HasVoluntaryReleaseMetadataBuffer {
|
||||
val pending_orel_send = Reg(init=Bool(false))
|
||||
val pending_orel_data = Reg(init=Bits(0, width = innerDataBeats))
|
||||
val vol_ognt_counter = Wire(new TwoWayBeatCounterStatus)
|
||||
val pending_orel = pending_orel_send || pending_orel_data.orR || vol_ognt_counter.pending
|
||||
val sending_orel = Reg(init = Bool(false))
|
||||
|
||||
// Block acceptance of inner releases if we have already started sending
|
||||
// outer releases, but have not yet sent out the beat corresponding to the
|
||||
// inner release. This function must be included in io.inner.release.ready
|
||||
// if it is possible to start accepting a new inner release as the previous
|
||||
// outer release is still being sent. DO NOT include this in the
|
||||
// io.inner.release.ready if the releases are not buffered
|
||||
// (i.e. io.inner.release and io.outer.release combinationally linked).
|
||||
def blockInnerRelease(rel: ReleaseMetadata = io.irel()): Bool = {
|
||||
val waiting_to_send = sending_orel && pending_orel_data(rel.addr_beat)
|
||||
val sending_now = io.outer.release.fire() && rel.addr_beat === io.orel().addr_beat
|
||||
rel.hasData() && (waiting_to_send || sending_now)
|
||||
}
|
||||
|
||||
def outerRelease(
|
||||
coh: ClientMetadata,
|
||||
buffering: Bool = Bool(true),
|
||||
data: UInt = io.irel().data,
|
||||
add_pending_data_bits: UInt = UInt(0),
|
||||
add_pending_send_bit: Bool = Bool(false),
|
||||
block_orel: Bool = Bool(false)) {
|
||||
|
||||
when (state =/= s_idle || io.alloc.irel.should) {
|
||||
pending_orel_data := (pending_orel_data |
|
||||
addPendingBitWhenBeatHasData(io.inner.release) |
|
||||
add_pending_data_bits) &
|
||||
dropPendingBitWhenBeatHasData(io.outer.release)
|
||||
}
|
||||
when (add_pending_send_bit) { pending_orel_send := Bool(true) }
|
||||
when (io.outer.release.fire()) {
|
||||
when (io.outer.release.bits.first()) { sending_orel := Bool(true) }
|
||||
when (io.outer.release.bits.last()) { sending_orel := Bool(false) }
|
||||
pending_orel_send := Bool(false)
|
||||
}
|
||||
|
||||
connectTwoWayBeatCounters(
|
||||
status = vol_ognt_counter,
|
||||
up = io.outer.release,
|
||||
down = io.outer.grant,
|
||||
trackUp = (r: Release) => r.isVoluntary() && r.requiresAck(),
|
||||
trackDown = (g: Grant) => g.isVoluntary())
|
||||
|
||||
io.outer.release.valid := !block_orel && Mux(buffering,
|
||||
(state === s_busy) && Mux(io.orel().hasData(),
|
||||
pending_orel_data(vol_ognt_counter.up.idx),
|
||||
pending_orel_send),
|
||||
// only writebacks need to be forwarded to the outer interface
|
||||
state =/= s_idle && io.alloc.irel.matches &&
|
||||
io.irel().hasData() && io.inner.release.valid)
|
||||
|
||||
io.outer.release.bits := coh.makeVoluntaryWriteback(
|
||||
client_xact_id = UInt(0), // TODO was tracker id, but not needed?
|
||||
addr_block = xact_addr_block,
|
||||
addr_beat = vol_ognt_counter.up.idx,
|
||||
data = data)
|
||||
|
||||
when (vol_ognt_counter.pending) { io.outer.grant.ready := Bool(true) }
|
||||
|
||||
scoreboard += (pending_orel, vol_ognt_counter.pending)
|
||||
}
|
||||
}
|
||||
|
||||
trait EmitsInnerProbes extends HasBlockAddressBuffer
|
||||
with HasXactTrackerStates
|
||||
with HasPendingBitHelpers {
|
||||
def io: HierarchicalXactTrackerIO
|
||||
|
||||
val needs_probes = (innerNCachingClients > 0)
|
||||
val pending_iprbs = Reg(UInt(width = max(innerNCachingClients, 1)))
|
||||
val curr_probe_dst = PriorityEncoder(pending_iprbs)
|
||||
|
||||
def full_representation: UInt
|
||||
def initializeProbes() {
|
||||
if (needs_probes)
|
||||
pending_iprbs := full_representation & ~io.incoherent.asUInt
|
||||
else
|
||||
pending_iprbs := UInt(0)
|
||||
}
|
||||
def irel_same_xact = io.irel().conflicts(xact_addr_block) &&
|
||||
!io.irel().isVoluntary() &&
|
||||
state === s_inner_probe
|
||||
|
||||
def innerProbe(prb: Probe, next: UInt) {
|
||||
if (needs_probes) {
|
||||
val irel_counter = Wire(new TwoWayBeatCounterStatus)
|
||||
|
||||
pending_iprbs := pending_iprbs & dropPendingBitAtDest(io.inner.probe)
|
||||
io.inner.probe.valid := state === s_inner_probe && pending_iprbs.orR
|
||||
io.inner.probe.bits := prb
|
||||
|
||||
connectTwoWayBeatCounters(
|
||||
status = irel_counter,
|
||||
up = io.inner.probe,
|
||||
down = io.inner.release,
|
||||
max = innerNCachingClients,
|
||||
trackDown = (r: Release) => (state =/= s_idle) && !r.isVoluntary())
|
||||
|
||||
when(state === s_inner_probe && !(pending_iprbs.orR || irel_counter.pending)) {
|
||||
state := next
|
||||
}
|
||||
} else {
|
||||
when (state === s_inner_probe) { state := next }
|
||||
}
|
||||
|
||||
//N.B. no pending bits added to scoreboard because all handled in s_inner_probe
|
||||
}
|
||||
}
|
||||
|
||||
trait RoutesInParent extends HasBlockAddressBuffer
|
||||
with HasXactTrackerStates {
|
||||
def io: HierarchicalXactTrackerIO
|
||||
type AddrComparison = HasCacheBlockAddress => Bool
|
||||
def exactAddrMatch(a: HasCacheBlockAddress): Bool = a.conflicts(xact_addr_block)
|
||||
def routeInParent(iacqMatches: AddrComparison = exactAddrMatch,
|
||||
irelMatches: AddrComparison = exactAddrMatch,
|
||||
oprbMatches: AddrComparison = exactAddrMatch,
|
||||
iacqCanAlloc: Bool = Bool(false),
|
||||
irelCanAlloc: Bool = Bool(false),
|
||||
oprbCanAlloc: Bool = Bool(false)) {
|
||||
io.alloc.iacq.matches := (state =/= s_idle) && iacqMatches(io.iacq())
|
||||
io.alloc.irel.matches := (state =/= s_idle) && irelMatches(io.irel())
|
||||
io.alloc.oprb.matches := (state =/= s_idle) && oprbMatches(io.oprb())
|
||||
io.alloc.iacq.can := state === s_idle && iacqCanAlloc
|
||||
io.alloc.irel.can := state === s_idle && irelCanAlloc
|
||||
io.alloc.oprb.can := state === s_idle && oprbCanAlloc
|
||||
io.alloc.addr_block := xact_addr_block
|
||||
io.alloc.idle := state === s_idle
|
||||
}
|
||||
}
|
||||
|
||||
trait AcceptsInnerAcquires extends HasAcquireMetadataBuffer
|
||||
with AcceptsVoluntaryReleases
|
||||
with HasXactTrackerStates
|
||||
with HasPendingBitHelpers {
|
||||
def io: HierarchicalXactTrackerIO
|
||||
def nSecondaryMisses: Int
|
||||
def alwaysWriteFullBeat: Boolean
|
||||
def inner_coh: ManagerMetadata
|
||||
def trackerId: Int
|
||||
|
||||
// Secondary miss queue holds transaction metadata used to make grants
|
||||
lazy val ignt_q = Module(new Queue(
|
||||
new SecondaryMissInfo()(p.alterPartial({ case TLId => p(InnerTLId) })),
|
||||
1 + nSecondaryMisses))
|
||||
|
||||
val pending_ignt = Wire(Bool())
|
||||
val ignt_data_idx = Wire(UInt())
|
||||
val ignt_data_done = Wire(Bool())
|
||||
val ifin_counter = Wire(new TwoWayBeatCounterStatus)
|
||||
val pending_put_data = Reg(init=Bits(0, width = innerDataBeats))
|
||||
val pending_ignt_data = Reg(init=Bits(0, width = innerDataBeats))
|
||||
|
||||
def iacq_same_xact: Bool =
|
||||
(xact_iacq.client_xact_id === io.iacq().client_xact_id) &&
|
||||
(xact_iacq.client_id === io.iacq().client_id) &&
|
||||
pending_ignt
|
||||
def iacq_same_xact_multibeat = iacq_same_xact && io.iacq().hasMultibeatData()
|
||||
def iacq_can_merge: Bool
|
||||
def iacq_is_allocating: Bool = state === s_idle && io.alloc.iacq.should && io.inner.acquire.valid
|
||||
def iacq_is_merging: Bool = (iacq_can_merge || iacq_same_xact) && io.inner.acquire.valid
|
||||
|
||||
def innerAcquire(can_alloc: Bool, next: UInt) {
|
||||
val iacq_matches_head = iacq_same_xact && xact_iacq.addr_beat === io.iacq().addr_beat
|
||||
|
||||
// Enqueue some metadata information that we'll use to make coherence updates with later
|
||||
ignt_q.io.enq.valid := iacq_is_allocating ||
|
||||
(!iacq_matches_head && pending_ignt &&
|
||||
io.inner.acquire.fire() && io.iacq().first())
|
||||
ignt_q.io.enq.bits := io.iacq()
|
||||
|
||||
// Use the outputs of the queue to make further messages
|
||||
xact_iacq := Mux(ignt_q.io.deq.valid, ignt_q.io.deq.bits, ignt_q.io.enq.bits)
|
||||
xact_addr_beat := xact_iacq.addr_beat
|
||||
pending_ignt := ignt_q.io.count > UInt(0)
|
||||
|
||||
// Track whether any beats are missing from a PutBlock
|
||||
when (state =/= s_idle || io.alloc.iacq.should) {
|
||||
pending_put_data := (pending_put_data &
|
||||
dropPendingBitWhenBeatHasData(io.inner.acquire)) |
|
||||
addPendingBitsOnFirstBeat(io.inner.acquire)
|
||||
}
|
||||
|
||||
// Intialize transaction metadata for accepted Acquire
|
||||
when(iacq_is_allocating) {
|
||||
xact_addr_block := io.iacq().addr_block
|
||||
xact_allocate := io.iacq().allocate() && can_alloc
|
||||
xact_amo_shift_bytes := io.iacq().amo_shift_bytes()
|
||||
xact_op_code := io.iacq().op_code()
|
||||
xact_addr_byte := io.iacq().addr_byte()
|
||||
xact_op_size := io.iacq().op_size()
|
||||
// Make sure to collect all data from a PutBlock
|
||||
pending_put_data := Mux(
|
||||
io.iacq().isBuiltInType(Acquire.putBlockType),
|
||||
dropPendingBitWhenBeatHasData(io.inner.acquire),
|
||||
UInt(0))
|
||||
pending_ignt_data := UInt(0)
|
||||
state := next
|
||||
}
|
||||
|
||||
scoreboard += (pending_put_data.orR)
|
||||
}
|
||||
|
||||
def innerGrant(
|
||||
data: UInt = io.ognt().data,
|
||||
external_pending: Bool = Bool(false),
|
||||
buffering: Bool = Bool(true),
|
||||
add_pending_bits: UInt = UInt(0)) {
|
||||
// Track the number of outstanding inner.finishes
|
||||
connectTwoWayBeatCounters(
|
||||
status = ifin_counter,
|
||||
up = io.inner.grant,
|
||||
down = io.inner.finish,
|
||||
max = nSecondaryMisses,
|
||||
trackUp = (g: Grant) => g.requiresAck())
|
||||
|
||||
// Track which beats are ready for response
|
||||
when(!iacq_is_allocating) {
|
||||
pending_ignt_data := pending_ignt_data |
|
||||
addPendingBitWhenBeatHasData(io.inner.release) |
|
||||
addPendingBitWhenBeatHasData(io.outer.grant) |
|
||||
add_pending_bits
|
||||
}
|
||||
|
||||
if (p(EnableL2Logging)) {
|
||||
when (io.inner.grant.fire() && io.ignt().hasData()) {
|
||||
printf("[get] addr_block=%x addr_beat=%d data=%x\n",
|
||||
xact_addr_block, io.ignt().addr_beat, io.ignt().data)
|
||||
}
|
||||
}
|
||||
|
||||
// Have we finished receiving the complete inner acquire transaction?
|
||||
val iacq_finished = !(state === s_idle ||
|
||||
state === s_meta_read ||
|
||||
pending_put_data.orR)
|
||||
|
||||
val ignt_from_iacq = inner_coh.makeGrant(
|
||||
sec = ignt_q.io.deq.bits,
|
||||
manager_xact_id = UInt(trackerId),
|
||||
data = data)
|
||||
|
||||
// Make the Grant message using the data stored in the secondary miss queue
|
||||
val (cnt, done) = connectOutgoingDataBeatCounter(io.inner.grant, ignt_q.io.deq.bits.addr_beat)
|
||||
ignt_data_idx := cnt
|
||||
ignt_data_done := done
|
||||
ignt_q.io.deq.ready := Bool(false)
|
||||
when(!vol_ignt_counter.pending) {
|
||||
ignt_q.io.deq.ready := ignt_data_done
|
||||
io.inner.grant.bits := ignt_from_iacq
|
||||
io.inner.grant.bits.addr_beat := ignt_data_idx // override based on outgoing counter
|
||||
when (state === s_busy && pending_ignt) {
|
||||
io.inner.grant.valid := !external_pending &&
|
||||
Mux(io.ignt().hasData(),
|
||||
Mux(buffering,
|
||||
pending_ignt_data(ignt_data_idx),
|
||||
io.outer.grant.valid),
|
||||
iacq_finished)
|
||||
}
|
||||
}
|
||||
|
||||
// We must wait for as many Finishes as we sent Grants
|
||||
io.inner.finish.ready := state === s_busy
|
||||
|
||||
scoreboard += (pending_ignt, ifin_counter.pending)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
trait EmitsOuterAcquires extends AcceptsInnerAcquires {
|
||||
val ognt_counter = Wire(new TwoWayBeatCounterStatus)
|
||||
|
||||
// Handle misses or coherence permission upgrades by initiating a new transaction in the outer memory:
|
||||
//
|
||||
// If we're allocating in this cache, we can use the current metadata
|
||||
// to make an appropriate custom Acquire, otherwise we copy over the
|
||||
// built-in Acquire from the inner TL to the outer TL
|
||||
def outerAcquire(
|
||||
caching: Bool,
|
||||
coh: ClientMetadata,
|
||||
block_outer_acquire: Bool = Bool(false),
|
||||
buffering: Bool = Bool(true),
|
||||
data: UInt = io.iacq().data,
|
||||
wmask: UInt = io.iacq().wmask(),
|
||||
next: UInt = s_busy) {
|
||||
|
||||
// Tracks outstanding Acquires, waiting for their matching Grant.
|
||||
connectTwoWayBeatCounters(
|
||||
status = ognt_counter,
|
||||
up = io.outer.acquire,
|
||||
down = io.outer.grant,
|
||||
beat = xact_addr_beat,
|
||||
trackDown = (g: Grant) => !g.isVoluntary())
|
||||
|
||||
io.outer.acquire.valid :=
|
||||
state === s_outer_acquire && !block_outer_acquire &&
|
||||
(xact_allocate ||
|
||||
Mux(buffering,
|
||||
!pending_put_data(ognt_counter.up.idx),
|
||||
// If not buffering, we should only send an outer acquire if
|
||||
// the ignt_q is not empty (pending_ignt) and the enqueued
|
||||
// transaction does not have data or we are receiving the
|
||||
// inner acquire and it is the same transaction as the one enqueued.
|
||||
pending_ignt && (!xact_iacq.hasData() ||
|
||||
(io.inner.acquire.valid && iacq_same_xact))))
|
||||
|
||||
io.outer.acquire.bits :=
|
||||
Mux(caching,
|
||||
coh.makeAcquire(
|
||||
op_code = xact_op_code,
|
||||
client_xact_id = UInt(0),
|
||||
addr_block = xact_addr_block),
|
||||
BuiltInAcquireBuilder(
|
||||
a_type = xact_iacq.a_type,
|
||||
client_xact_id = UInt(0),
|
||||
addr_block = xact_addr_block,
|
||||
addr_beat = ognt_counter.up.idx,
|
||||
data = data,
|
||||
addr_byte = xact_addr_byte,
|
||||
operand_size = xact_op_size,
|
||||
opcode = xact_op_code,
|
||||
wmask = wmask,
|
||||
alloc = Bool(false))
|
||||
(p.alterPartial({ case TLId => p(OuterTLId)})))
|
||||
|
||||
when(state === s_outer_acquire && ognt_counter.up.done) { state := next }
|
||||
|
||||
when (ognt_counter.pending) { io.outer.grant.ready := Bool(true) }
|
||||
|
||||
scoreboard += ognt_counter.pending
|
||||
}
|
||||
}
|
||||
|
||||
abstract class VoluntaryReleaseTracker(val trackerId: Int)(implicit p: Parameters) extends XactTracker()(p)
|
||||
with AcceptsVoluntaryReleases
|
||||
with RoutesInParent {
|
||||
def irel_can_merge = Bool(false)
|
||||
def irel_same_xact = io.irel().conflicts(xact_addr_block) &&
|
||||
io.irel().isVoluntary() &&
|
||||
pending_irel_data.orR
|
||||
}
|
||||
|
||||
abstract class AcquireTracker(val trackerId: Int)(implicit p: Parameters) extends XactTracker()(p)
|
||||
with AcceptsInnerAcquires
|
||||
with EmitsOuterAcquires
|
||||
with EmitsInnerProbes
|
||||
with RoutesInParent {
|
||||
}
|
||||
43
src/main/scala/uncore/coherence/Directory.scala
Normal file
43
src/main/scala/uncore/coherence/Directory.scala
Normal file
@@ -0,0 +1,43 @@
|
||||
// See LICENSE for license details.
|
||||
|
||||
package uncore.coherence
|
||||
import Chisel._
|
||||
|
||||
// This class encapsulates transformations on different directory information
|
||||
// storage formats
|
||||
abstract class DirectoryRepresentation(val width: Int) {
|
||||
def pop(prev: UInt, id: UInt): UInt
|
||||
def push(prev: UInt, id: UInt): UInt
|
||||
def flush: UInt
|
||||
def none(s: UInt): Bool
|
||||
def one(s: UInt): Bool
|
||||
def count(s: UInt): UInt
|
||||
def next(s: UInt): UInt
|
||||
def full(s: UInt): UInt
|
||||
}
|
||||
|
||||
abstract trait HasDirectoryRepresentation {
|
||||
val dir: DirectoryRepresentation
|
||||
}
|
||||
|
||||
class NullRepresentation(nClients: Int) extends DirectoryRepresentation(1) {
|
||||
def pop(prev: UInt, id: UInt) = UInt(0)
|
||||
def push(prev: UInt, id: UInt) = UInt(0)
|
||||
def flush = UInt(0)
|
||||
def none(s: UInt) = Bool(false)
|
||||
def one(s: UInt) = Bool(false)
|
||||
def count(s: UInt) = UInt(nClients)
|
||||
def next(s: UInt) = UInt(0)
|
||||
def full(s: UInt) = SInt(-1, width = nClients).asUInt
|
||||
}
|
||||
|
||||
class FullRepresentation(nClients: Int) extends DirectoryRepresentation(nClients) {
|
||||
def pop(prev: UInt, id: UInt) = prev & ~UIntToOH(id)
|
||||
def push(prev: UInt, id: UInt) = prev | UIntToOH(id)
|
||||
def flush = UInt(0, width = width)
|
||||
def none(s: UInt) = s === UInt(0)
|
||||
def one(s: UInt) = PopCount(s) === UInt(1)
|
||||
def count(s: UInt) = PopCount(s)
|
||||
def next(s: UInt) = PriorityEncoder(s)
|
||||
def full(s: UInt) = s
|
||||
}
|
||||
344
src/main/scala/uncore/coherence/Metadata.scala
Normal file
344
src/main/scala/uncore/coherence/Metadata.scala
Normal file
@@ -0,0 +1,344 @@
|
||||
// See LICENSE for license details.
|
||||
|
||||
package uncore.coherence
|
||||
|
||||
import Chisel._
|
||||
import uncore.tilelink._
|
||||
import uncore.constants._
|
||||
import cde.{Parameters, Field}
|
||||
|
||||
/** Identifies the TLId of the inner network in a hierarchical cache controller */
|
||||
case object InnerTLId extends Field[String]
|
||||
/** Identifies the TLId of the outer network in a hierarchical cache controller */
|
||||
case object OuterTLId extends Field[String]
|
||||
|
||||
/** Base class to represent coherence information in clients and managers */
|
||||
abstract class CoherenceMetadata(implicit p: Parameters) extends TLBundle()(p) {
|
||||
val co = tlCoh
|
||||
}
|
||||
|
||||
/** Stores the client-side coherence information,
|
||||
* such as permissions on the data and whether the data is dirty.
|
||||
* Its API can be used to make TileLink messages in response to
|
||||
* memory operations or [[uncore.Probe]] messages.
|
||||
*/
|
||||
class ClientMetadata(implicit p: Parameters) extends CoherenceMetadata()(p) {
|
||||
/** Actual state information stored in this bundle */
|
||||
val state = UInt(width = co.clientStateWidth)
|
||||
|
||||
/** Metadata equality */
|
||||
def ===(rhs: ClientMetadata): Bool = this.state === rhs.state
|
||||
def =/=(rhs: ClientMetadata): Bool = !this.===(rhs)
|
||||
|
||||
/** Is the block's data present in this cache */
|
||||
def isValid(dummy: Int = 0): Bool = co.isValid(this)
|
||||
/** Does this cache have permissions on this block sufficient to perform op */
|
||||
def isHit(op_code: UInt): Bool = co.isHit(op_code, this)
|
||||
/** Does this cache lack permissions on this block sufficient to perform op */
|
||||
def isMiss(op_code: UInt): Bool = !co.isHit(op_code, this)
|
||||
/** Does a secondary miss on the block require another Acquire message */
|
||||
def requiresAcquireOnSecondaryMiss(first_op: UInt, second_op: UInt): Bool =
|
||||
co.requiresAcquireOnSecondaryMiss(first_op, second_op, this)
|
||||
/** Does op require a Release to be made to outer memory */
|
||||
def requiresReleaseOnCacheControl(op_code: UInt): Bool =
|
||||
co.requiresReleaseOnCacheControl(op_code: UInt, this)
|
||||
/** Does an eviction require a Release to be made to outer memory */
|
||||
def requiresVoluntaryWriteback(dummy: Int = 0): Bool =
|
||||
co.requiresReleaseOnCacheControl(M_FLUSH, this)
|
||||
|
||||
/** Constructs an Acquire message based on this metdata and a memory operation
|
||||
*
|
||||
* @param client_xact_id client's transaction id
|
||||
* @param addr_block address of the cache block
|
||||
* @param op_code a memory operation from [[uncore.constants.MemoryOpConstants]]
|
||||
*/
|
||||
def makeAcquire(
|
||||
op_code: UInt,
|
||||
client_xact_id: UInt,
|
||||
addr_block: UInt): Acquire = {
|
||||
Acquire(
|
||||
is_builtin_type = Bool(false),
|
||||
a_type = co.getAcquireType(op_code, this),
|
||||
client_xact_id = client_xact_id,
|
||||
addr_block = addr_block,
|
||||
union = Cat(op_code, Bool(true)))(p)
|
||||
}
|
||||
|
||||
/** Constructs a Release message based on this metadata on cache control op
|
||||
*
|
||||
* @param client_xact_id client's transaction id
|
||||
* @param addr_block address of the cache block
|
||||
* @param addr_beat sub-block address (which beat)
|
||||
* @param data data being written back
|
||||
*/
|
||||
def makeVoluntaryRelease(
|
||||
op_code: UInt,
|
||||
client_xact_id: UInt,
|
||||
addr_block: UInt,
|
||||
addr_beat: UInt = UInt(0),
|
||||
data: UInt = UInt(0)): Release =
|
||||
Release(
|
||||
voluntary = Bool(true),
|
||||
r_type = co.getReleaseType(op_code, this),
|
||||
client_xact_id = client_xact_id,
|
||||
addr_block = addr_block,
|
||||
addr_beat = addr_beat,
|
||||
data = data)(p)
|
||||
|
||||
/** Constructs a Release message based on this metadata on an eviction
|
||||
*
|
||||
* @param client_xact_id client's transaction id
|
||||
* @param addr_block address of the cache block
|
||||
* @param addr_beat sub-block address (which beat)
|
||||
* @param data data being written back
|
||||
*/
|
||||
def makeVoluntaryWriteback(
|
||||
client_xact_id: UInt,
|
||||
addr_block: UInt,
|
||||
addr_beat: UInt = UInt(0),
|
||||
data: UInt = UInt(0)): Release =
|
||||
makeVoluntaryRelease(
|
||||
op_code = M_FLUSH,
|
||||
client_xact_id = client_xact_id,
|
||||
addr_block = addr_block,
|
||||
addr_beat = addr_beat,
|
||||
data = data)
|
||||
|
||||
/** Constructs a Release message based on this metadata and a [[uncore.Probe]]
|
||||
*
|
||||
* @param the incoming [[uncore.Probe]]
|
||||
* @param addr_beat sub-block address (which beat)
|
||||
* @param data data being released
|
||||
*/
|
||||
def makeRelease(
|
||||
prb: Probe,
|
||||
addr_beat: UInt = UInt(0),
|
||||
data: UInt = UInt(0)): Release =
|
||||
Release(
|
||||
voluntary = Bool(false),
|
||||
r_type = co.getReleaseType(prb, this),
|
||||
client_xact_id = UInt(0),
|
||||
addr_block = prb.addr_block,
|
||||
addr_beat = addr_beat,
|
||||
data = data)(p)
|
||||
|
||||
/** New metadata after receiving a [[uncore.Grant]]
|
||||
*
|
||||
* @param incoming the incoming [[uncore.Grant]]
|
||||
* @param pending the mem op that triggered this transaction
|
||||
*/
|
||||
def onGrant(incoming: Grant, pending: UInt): ClientMetadata =
|
||||
co.clientMetadataOnGrant(incoming, pending, this)
|
||||
|
||||
/** New metadata after receiving a [[uncore.Probe]]
|
||||
*
|
||||
* @param incoming the incoming [[uncore.Probe]]
|
||||
*/
|
||||
def onProbe(incoming: Probe): ClientMetadata =
|
||||
co.clientMetadataOnProbe(incoming, this)
|
||||
|
||||
/** New metadata after a op_code hits this block
|
||||
*
|
||||
* @param op_code a memory operation from [[uncore.constants.MemoryOpConstants]]
|
||||
*/
|
||||
def onHit(op_code: UInt): ClientMetadata =
|
||||
co.clientMetadataOnHit(op_code, this)
|
||||
|
||||
/** New metadata after op_code releases permissions on this block
|
||||
*
|
||||
* @param op_code a memory operation from [[uncore.constants.MemoryOpConstants]]
|
||||
*/
|
||||
def onCacheControl(op_code: UInt): ClientMetadata =
|
||||
co.clientMetadataOnCacheControl(op_code, this)
|
||||
}
|
||||
|
||||
/** Factories for ClientMetadata, including on reset */
|
||||
object ClientMetadata {
|
||||
def apply(state: UInt)(implicit p: Parameters) = {
|
||||
val meta = Wire(new ClientMetadata)
|
||||
meta.state := state
|
||||
meta
|
||||
}
|
||||
def onReset(implicit p: Parameters) = ClientMetadata(UInt(0))(p) // TODO: assumes clientInvalid === 0
|
||||
}
|
||||
|
||||
/** Stores manager-side information about the status
|
||||
* of a cache block, including whether it has any known sharers.
|
||||
*
|
||||
* Its API can be used to create [[uncore.Probe]] and [[uncore.Grant]] messages.
|
||||
*/
|
||||
class ManagerMetadata(implicit p: Parameters) extends CoherenceMetadata()(p) {
|
||||
// Currently no coherence policies assume manager-side state information
|
||||
// val state = UInt(width = co.masterStateWidth) TODO: Fix 0-width wires in Chisel
|
||||
|
||||
/** The directory information for this block */
|
||||
val sharers = UInt(width = co.dir.width)
|
||||
|
||||
/** Metadata equality */
|
||||
def ===(rhs: ManagerMetadata): Bool = //this.state === rhs.state && TODO: Fix 0-width wires in Chisel
|
||||
this.sharers === rhs.sharers
|
||||
def =/=(rhs: ManagerMetadata): Bool = !this.===(rhs)
|
||||
|
||||
/** Converts the directory info into an N-hot sharer bitvector (i.e. full representation) */
|
||||
def full(dummy: Int = 0): UInt = co.dir.full(this.sharers)
|
||||
|
||||
/** Does this [[uncore.Acquire]] require [[uncore.Probe Probes]] to be sent */
|
||||
def requiresProbes(acq: HasAcquireType): Bool = co.requiresProbes(acq, this)
|
||||
/** Does this memory op require [[uncore.Probe Probes]] to be sent */
|
||||
def requiresProbes(op_code: UInt): Bool = co.requiresProbes(op_code, this)
|
||||
/** Does an eviction require [[uncore.Probe Probes]] to be sent */
|
||||
def requiresProbesOnVoluntaryWriteback(dummy: Int = 0): Bool =
|
||||
co.requiresProbes(M_FLUSH, this)
|
||||
|
||||
/** Construct an appropriate [[uncore.ProbeToDst]] for a given [[uncore.Acquire]]
|
||||
*
|
||||
* @param dst Destination client id for this Probe
|
||||
* @param acq Acquire message triggering this Probe
|
||||
* @param addr_block address of the cache block being probed
|
||||
*/
|
||||
def makeProbe(dst: UInt, acq: HasAcquireType, addr_block: UInt): ProbeToDst =
|
||||
Probe(dst, co.getProbeType(acq, this), addr_block)(p)
|
||||
|
||||
/** Construct an appropriate [[uncore.ProbeToDst]] for a given [[uncore.Acquire]]
|
||||
*
|
||||
* @param dst Destination client id for this Probe
|
||||
* @param acq Acquire message triggering this Probe
|
||||
*/
|
||||
def makeProbe(dst: UInt, acq: AcquireMetadata): ProbeToDst =
|
||||
Probe(dst, co.getProbeType(acq, this), acq.addr_block)(p)
|
||||
|
||||
/** Construct an appropriate [[uncore.ProbeToDst]] for a given mem op
|
||||
*
|
||||
* @param dst Destination client id for this Probe
|
||||
* @param op_code memory operation triggering this Probe
|
||||
* @param addr_block address of the cache block being probed
|
||||
*/
|
||||
def makeProbe(dst: UInt, op_code: UInt, addr_block: UInt): ProbeToDst =
|
||||
Probe(dst, co.getProbeType(op_code, this), addr_block)(p)
|
||||
|
||||
/** Construct an appropriate [[uncore.ProbeToDst]] for an eviction
|
||||
*
|
||||
* @param dst Destination client id for this Probe
|
||||
* @param addr_block address of the cache block being probed prior to eviction
|
||||
*/
|
||||
def makeProbeForVoluntaryWriteback(dst: UInt, addr_block: UInt): ProbeToDst =
|
||||
makeProbe(dst, M_FLUSH, addr_block)
|
||||
|
||||
/** Construct an appropriate [[uncore.GrantToDst]] to acknowledge an [[uncore.Release]]
|
||||
*
|
||||
* @param rel Release message being acknowledged by this Grant
|
||||
*/
|
||||
def makeGrant(rel: ReleaseMetadata with HasClientId): GrantToDst =
|
||||
Grant(
|
||||
dst = rel.client_id,
|
||||
is_builtin_type = Bool(true),
|
||||
g_type = Grant.voluntaryAckType,
|
||||
client_xact_id = rel.client_xact_id,
|
||||
manager_xact_id = UInt(0))(p)
|
||||
|
||||
/** Construct an appropriate [[uncore.GrantToDst]] to respond to an [[uncore.Acquire]]
|
||||
*
|
||||
* May contain single or multiple beats of data, or just be a permissions upgrade.
|
||||
*
|
||||
* @param acq Acquire message being responded to by this Grant
|
||||
* @param manager_xact_id manager's transaction id
|
||||
* @param addr_beat beat id of the data
|
||||
* @param data data being refilled to the original requestor
|
||||
*/
|
||||
def makeGrant(
|
||||
acq: AcquireMetadata with HasClientId,
|
||||
manager_xact_id: UInt,
|
||||
addr_beat: UInt = UInt(0),
|
||||
data: UInt = UInt(0)): GrantToDst =
|
||||
Grant(
|
||||
dst = acq.client_id,
|
||||
is_builtin_type = acq.isBuiltInType(),
|
||||
g_type = co.getGrantType(acq, this),
|
||||
client_xact_id = acq.client_xact_id,
|
||||
manager_xact_id = manager_xact_id,
|
||||
addr_beat = addr_beat,
|
||||
data = data)(p)
|
||||
|
||||
/** Construct an [[uncore.GrantToDst]] to respond to an [[uncore.Acquire]] with some overrides
|
||||
*
|
||||
* Used to respond to secondary misses merged into this transaction.
|
||||
* May contain single or multiple beats of data.
|
||||
*
|
||||
* @param sec Secondary miss info
|
||||
* @param manager_xact_id manager's transaction id
|
||||
* @param data data being refilled to the original requestor
|
||||
*/
|
||||
def makeGrant(
|
||||
sec: SecondaryMissInfo,
|
||||
manager_xact_id: UInt,
|
||||
data: UInt): GrantToDst = {
|
||||
Grant(
|
||||
dst = sec.client_id,
|
||||
is_builtin_type = sec.isBuiltInType(),
|
||||
g_type = co.getGrantType(sec, this),
|
||||
client_xact_id = sec.client_xact_id,
|
||||
manager_xact_id = manager_xact_id,
|
||||
addr_beat = sec.addr_beat,
|
||||
data = data)(p)
|
||||
}
|
||||
|
||||
/** New metadata after receiving a [[uncore.ReleaseFromSrc]]
|
||||
*
|
||||
* @param incoming the incoming [[uncore.ReleaseFromSrc]]
|
||||
*/
|
||||
def onRelease(incoming: ReleaseMetadata with HasClientId): ManagerMetadata =
|
||||
co.managerMetadataOnRelease(incoming, incoming.client_id, this)
|
||||
|
||||
/** New metadata after sending a [[uncore.GrantToDst]]
|
||||
*
|
||||
* @param outgoing the outgoing [[uncore.GrantToDst]]
|
||||
*/
|
||||
def onGrant(outgoing: GrantMetadata with HasClientId): ManagerMetadata =
|
||||
co.managerMetadataOnGrant(outgoing, outgoing.client_id, this)
|
||||
}
|
||||
|
||||
/** Factories for ManagerMetadata, including on reset */
|
||||
object ManagerMetadata {
|
||||
def apply(sharers: UInt, state: UInt = UInt(width = 0))(implicit p: Parameters) = {
|
||||
val meta = Wire(new ManagerMetadata)
|
||||
//meta.state := state TODO: Fix 0-width wires in Chisel
|
||||
meta.sharers := sharers
|
||||
meta
|
||||
}
|
||||
def apply(implicit p: Parameters) = {
|
||||
val meta = Wire(new ManagerMetadata)
|
||||
//meta.state := UInt(width = 0) TODO: Fix 0-width wires in Chisel
|
||||
meta.sharers := meta.co.dir.flush
|
||||
meta
|
||||
}
|
||||
def onReset(implicit p: Parameters) = ManagerMetadata(p)
|
||||
}
|
||||
|
||||
/** HierarchicalMetadata is used in a cache in a multi-level memory hierarchy
|
||||
* that is a manager with respect to some inner caches and a client with
|
||||
* respect to some outer cache.
|
||||
*
|
||||
* This class makes use of two different sets of TileLink parameters, which are
|
||||
* applied by contextually mapping [[uncore.TLId]] to one of
|
||||
* [[uncore.InnerTLId]] or [[uncore.OuterTLId]].
|
||||
*/
|
||||
class HierarchicalMetadata(implicit p: Parameters) extends CoherenceMetadata()(p) {
|
||||
val inner: ManagerMetadata = new ManagerMetadata()(p.alterPartial({case TLId => p(InnerTLId)}))
|
||||
val outer: ClientMetadata = new ClientMetadata()(p.alterPartial({case TLId => p(OuterTLId)}))
|
||||
def ===(rhs: HierarchicalMetadata): Bool =
|
||||
this.inner === rhs.inner && this.outer === rhs.outer
|
||||
def =/=(rhs: HierarchicalMetadata): Bool = !this.===(rhs)
|
||||
}
|
||||
|
||||
/** Factories for HierarchicalMetadata, including on reset */
|
||||
object HierarchicalMetadata {
|
||||
def apply(inner: ManagerMetadata, outer: ClientMetadata)
|
||||
(implicit p: Parameters): HierarchicalMetadata = {
|
||||
val m = Wire(new HierarchicalMetadata)
|
||||
m.inner := inner
|
||||
m.outer := outer
|
||||
m
|
||||
}
|
||||
def onReset(implicit p: Parameters): HierarchicalMetadata =
|
||||
apply(ManagerMetadata.onReset, ClientMetadata.onReset)
|
||||
}
|
||||
696
src/main/scala/uncore/coherence/Policies.scala
Normal file
696
src/main/scala/uncore/coherence/Policies.scala
Normal file
@@ -0,0 +1,696 @@
|
||||
// See LICENSE for license details.
|
||||
|
||||
package uncore.coherence
|
||||
|
||||
import Chisel._
|
||||
import uncore.tilelink._
|
||||
import uncore.constants._
|
||||
import uncore.util._
|
||||
|
||||
/** The entire CoherencePolicy API consists of the following three traits:
|
||||
* HasCustomTileLinkMessageTypes, used to define custom messages
|
||||
* HasClientSideCoherencePolicy, for client coherence agents
|
||||
* HasManagerSideCoherencePolicy, for manager coherence agents
|
||||
*/
|
||||
abstract class CoherencePolicy(val dir: DirectoryRepresentation)
|
||||
extends HasCustomTileLinkMessageTypes
|
||||
with HasClientSideCoherencePolicy
|
||||
with HasManagerSideCoherencePolicy
|
||||
|
||||
/** This API defines the custom, coherence-policy-defined message types,
|
||||
* as opposed to the built-in ones found in tilelink.scala.
|
||||
* Policies must enumerate the custom messages to be sent over each
|
||||
* channel, as well as which of them have associated data.
|
||||
*/
|
||||
trait HasCustomTileLinkMessageTypes {
|
||||
val nAcquireTypes: Int
|
||||
def acquireTypeWidth = log2Up(nAcquireTypes)
|
||||
val nProbeTypes: Int
|
||||
def probeTypeWidth = log2Up(nProbeTypes)
|
||||
val nReleaseTypes: Int
|
||||
def releaseTypeWidth = log2Up(nReleaseTypes)
|
||||
val nGrantTypes: Int
|
||||
def grantTypeWidth = log2Up(nGrantTypes)
|
||||
|
||||
val acquireTypesWithData = Nil // Only built-in Acquire types have data for now
|
||||
def releaseTypesWithData: Seq[UInt]
|
||||
def grantTypesWithData: Seq[UInt]
|
||||
}
|
||||
|
||||
/** This API contains all functions required for client coherence agents.
|
||||
* Policies must enumerate the number of client states and define their
|
||||
* permissions with respect to memory operations. Policies must fill in functions
|
||||
* to control which messages are sent and how metadata is updated in response
|
||||
* to coherence events. These funtions are generally called from within the
|
||||
* ClientMetadata class in metadata.scala
|
||||
*/
|
||||
trait HasClientSideCoherencePolicy {
|
||||
// Client coherence states and their permissions
|
||||
val nClientStates: Int
|
||||
def clientStateWidth = log2Ceil(nClientStates)
|
||||
def clientStatesWithReadPermission: Seq[UInt]
|
||||
def clientStatesWithWritePermission: Seq[UInt]
|
||||
def clientStatesWithDirtyData: Seq[UInt]
|
||||
|
||||
// Transaction initiation logic
|
||||
def isValid(meta: ClientMetadata): Bool
|
||||
def isHit(cmd: UInt, meta: ClientMetadata): Bool = {
|
||||
Mux(isWriteIntent(cmd),
|
||||
meta.state isOneOf clientStatesWithWritePermission,
|
||||
meta.state isOneOf clientStatesWithReadPermission)
|
||||
}
|
||||
//TODO: Assumes all states with write permissions also have read permissions
|
||||
def requiresAcquireOnSecondaryMiss(
|
||||
first_cmd: UInt,
|
||||
second_cmd: UInt,
|
||||
meta: ClientMetadata): Bool = {
|
||||
isWriteIntent(second_cmd) && !isWriteIntent(first_cmd)
|
||||
}
|
||||
//TODO: Assumes all cache ctrl ops writeback dirty data, and
|
||||
// doesn't issue transaction when e.g. downgrading Exclusive to Shared:
|
||||
def requiresReleaseOnCacheControl(cmd: UInt, meta: ClientMetadata): Bool =
|
||||
meta.state isOneOf clientStatesWithDirtyData
|
||||
|
||||
// Determine which custom message type to use
|
||||
def getAcquireType(cmd: UInt, meta: ClientMetadata): UInt
|
||||
def getReleaseType(cmd: UInt, meta: ClientMetadata): UInt
|
||||
def getReleaseType(p: HasProbeType, meta: ClientMetadata): UInt
|
||||
|
||||
// Mutate ClientMetadata based on messages or cmds
|
||||
def clientMetadataOnHit(cmd: UInt, meta: ClientMetadata): ClientMetadata
|
||||
def clientMetadataOnCacheControl(cmd: UInt, meta: ClientMetadata): ClientMetadata
|
||||
def clientMetadataOnGrant(incoming: HasGrantType, cmd: UInt, meta: ClientMetadata): ClientMetadata
|
||||
def clientMetadataOnProbe(incoming: HasProbeType, meta: ClientMetadata): ClientMetadata
|
||||
}
|
||||
|
||||
/** This API contains all functions required for manager coherence agents.
|
||||
* Policies must enumerate the number of manager states. Policies must fill
|
||||
* in functions to control which Probe and Grant messages are sent and how
|
||||
* metadata should be updated in response to coherence events. These funtions
|
||||
* are generally called from within the ManagerMetadata class in metadata.scala
|
||||
*/
|
||||
trait HasManagerSideCoherencePolicy extends HasDirectoryRepresentation {
|
||||
val nManagerStates: Int
|
||||
def masterStateWidth = log2Ceil(nManagerStates)
|
||||
|
||||
// Transaction probing logic
|
||||
def requiresProbes(acq: HasAcquireType, meta: ManagerMetadata): Bool
|
||||
def requiresProbes(cmd: UInt, meta: ManagerMetadata): Bool
|
||||
|
||||
// Determine which custom message type to use in response
|
||||
def getProbeType(cmd: UInt, meta: ManagerMetadata): UInt
|
||||
def getProbeType(acq: HasAcquireType, meta: ManagerMetadata): UInt
|
||||
def getGrantType(acq: HasAcquireType, meta: ManagerMetadata): UInt
|
||||
def getExclusiveGrantType(): UInt
|
||||
|
||||
// Mutate ManagerMetadata based on messages or cmds
|
||||
def managerMetadataOnRelease(incoming: HasReleaseType, src: UInt, meta: ManagerMetadata): ManagerMetadata
|
||||
def managerMetadataOnGrant(outgoing: HasGrantType, dst: UInt, meta: ManagerMetadata) =
|
||||
ManagerMetadata(sharers=Mux(outgoing.isBuiltInType(), // Assumes all built-ins are uncached
|
||||
meta.sharers,
|
||||
dir.push(meta.sharers, dst)))(meta.p)
|
||||
//state = meta.state) TODO: Fix 0-width wires in Chisel
|
||||
}
|
||||
|
||||
/** The following concrete implementations of CoherencePolicy each provide the
|
||||
* functionality of one particular protocol.
|
||||
*/
|
||||
|
||||
/** A simple protocol with only two Client states.
|
||||
* Data is always assumed to be dirty.
|
||||
* Only a single client may ever have a copy of a block at a time.
|
||||
*/
|
||||
class MICoherence(dir: DirectoryRepresentation) extends CoherencePolicy(dir) {
|
||||
// Message types
|
||||
val nAcquireTypes = 1
|
||||
val nProbeTypes = 2
|
||||
val nReleaseTypes = 4
|
||||
val nGrantTypes = 1
|
||||
|
||||
val acquireExclusive :: Nil = Enum(UInt(), nAcquireTypes)
|
||||
val probeInvalidate :: probeCopy :: Nil = Enum(UInt(), nProbeTypes)
|
||||
val releaseInvalidateData :: releaseCopyData :: releaseInvalidateAck :: releaseCopyAck :: Nil = Enum(UInt(), nReleaseTypes)
|
||||
val grantExclusive :: Nil = Enum(UInt(), nGrantTypes)
|
||||
|
||||
def releaseTypesWithData = Seq(releaseInvalidateData, releaseCopyData)
|
||||
def grantTypesWithData = Seq(grantExclusive)
|
||||
|
||||
// Client states and functions
|
||||
val nClientStates = 2
|
||||
val clientInvalid :: clientValid :: Nil = Enum(UInt(), nClientStates)
|
||||
|
||||
def clientStatesWithReadPermission = Seq(clientValid)
|
||||
def clientStatesWithWritePermission = Seq(clientValid)
|
||||
def clientStatesWithDirtyData = Seq(clientValid)
|
||||
|
||||
def isValid (meta: ClientMetadata): Bool = meta.state =/= clientInvalid
|
||||
|
||||
def getAcquireType(cmd: UInt, meta: ClientMetadata): UInt = acquireExclusive
|
||||
|
||||
def getReleaseType(cmd: UInt, meta: ClientMetadata): UInt = {
|
||||
val dirty = meta.state isOneOf clientStatesWithDirtyData
|
||||
MuxLookup(cmd, releaseCopyAck, Array(
|
||||
M_FLUSH -> Mux(dirty, releaseInvalidateData, releaseInvalidateAck),
|
||||
M_PRODUCE -> Mux(dirty, releaseCopyData, releaseCopyAck),
|
||||
M_CLEAN -> Mux(dirty, releaseCopyData, releaseCopyAck)))
|
||||
}
|
||||
|
||||
def getReleaseType(incoming: HasProbeType, meta: ClientMetadata): UInt =
|
||||
MuxLookup(incoming.p_type, releaseInvalidateAck, Array(
|
||||
probeInvalidate -> getReleaseType(M_FLUSH, meta),
|
||||
probeCopy -> getReleaseType(M_FLUSH, meta)))
|
||||
|
||||
def clientMetadataOnHit(cmd: UInt, meta: ClientMetadata) = meta
|
||||
|
||||
def clientMetadataOnCacheControl(cmd: UInt, meta: ClientMetadata) =
|
||||
ClientMetadata(Mux(cmd === M_FLUSH, clientInvalid, meta.state))(meta.p)
|
||||
|
||||
def clientMetadataOnGrant(incoming: HasGrantType, cmd: UInt, meta: ClientMetadata) =
|
||||
ClientMetadata(Mux(incoming.isBuiltInType(), clientInvalid, clientValid))(meta.p)
|
||||
|
||||
def clientMetadataOnProbe(incoming: HasProbeType, meta: ClientMetadata) =
|
||||
ClientMetadata(Mux(incoming.p_type === probeInvalidate,
|
||||
clientInvalid, meta.state))(meta.p)
|
||||
|
||||
// Manager states and functions:
|
||||
val nManagerStates = 0 // We don't actually need any states for this protocol
|
||||
|
||||
def requiresProbes(a: HasAcquireType, meta: ManagerMetadata) = !dir.none(meta.sharers)
|
||||
|
||||
def requiresProbes(cmd: UInt, meta: ManagerMetadata) = !dir.none(meta.sharers)
|
||||
|
||||
def getProbeType(cmd: UInt, meta: ManagerMetadata): UInt =
|
||||
MuxLookup(cmd, probeCopy, Array(
|
||||
M_FLUSH -> probeInvalidate))
|
||||
|
||||
def getProbeType(a: HasAcquireType, meta: ManagerMetadata): UInt =
|
||||
Mux(a.isBuiltInType(),
|
||||
MuxLookup(a.a_type, probeCopy, Array(
|
||||
Acquire.getBlockType -> probeCopy,
|
||||
Acquire.putBlockType -> probeInvalidate,
|
||||
Acquire.getType -> probeCopy,
|
||||
Acquire.putType -> probeInvalidate,
|
||||
Acquire.getPrefetchType -> probeCopy,
|
||||
Acquire.putPrefetchType -> probeInvalidate,
|
||||
Acquire.putAtomicType -> probeInvalidate)),
|
||||
probeInvalidate)
|
||||
|
||||
def getGrantType(a: HasAcquireType, meta: ManagerMetadata): UInt =
|
||||
Mux(a.isBuiltInType(), Acquire.getBuiltInGrantType(a.a_type), grantExclusive)
|
||||
def getExclusiveGrantType(): UInt = grantExclusive
|
||||
|
||||
def managerMetadataOnRelease(incoming: HasReleaseType, src: UInt, meta: ManagerMetadata) = {
|
||||
val popped = ManagerMetadata(sharers=dir.pop(meta.sharers, src))(meta.p)
|
||||
MuxCase(meta, Array(
|
||||
incoming.is(releaseInvalidateData) -> popped,
|
||||
incoming.is(releaseInvalidateAck) -> popped))
|
||||
}
|
||||
}
|
||||
|
||||
/** A simple protocol with only three Client states.
|
||||
* Data is marked as dirty when written.
|
||||
* Only a single client may ever have a copy of a block at a time.
|
||||
*/
|
||||
class MEICoherence(dir: DirectoryRepresentation) extends CoherencePolicy(dir) {
|
||||
// Message types
|
||||
val nAcquireTypes = 2
|
||||
val nProbeTypes = 3
|
||||
val nReleaseTypes = 6
|
||||
val nGrantTypes = 1
|
||||
|
||||
val acquireExclusiveClean :: acquireExclusiveDirty :: Nil = Enum(UInt(), nAcquireTypes)
|
||||
val probeInvalidate :: probeDowngrade :: probeCopy :: Nil = Enum(UInt(), nProbeTypes)
|
||||
val releaseInvalidateData :: releaseDowngradeData :: releaseCopyData :: releaseInvalidateAck :: releaseDowngradeAck :: releaseCopyAck :: Nil = Enum(UInt(), nReleaseTypes)
|
||||
val grantExclusive :: Nil = Enum(UInt(), nGrantTypes)
|
||||
|
||||
def releaseTypesWithData = Seq(releaseInvalidateData, releaseDowngradeData, releaseCopyData)
|
||||
def grantTypesWithData = Seq(grantExclusive)
|
||||
|
||||
// Client states and functions
|
||||
val nClientStates = 3
|
||||
val clientInvalid :: clientExclusiveClean :: clientExclusiveDirty :: Nil = Enum(UInt(), nClientStates)
|
||||
|
||||
def clientStatesWithReadPermission = Seq(clientExclusiveClean, clientExclusiveDirty)
|
||||
def clientStatesWithWritePermission = Seq(clientExclusiveClean, clientExclusiveDirty)
|
||||
def clientStatesWithDirtyData = Seq(clientExclusiveDirty)
|
||||
|
||||
def isValid (meta: ClientMetadata) = meta.state =/= clientInvalid
|
||||
|
||||
def getAcquireType(cmd: UInt, meta: ClientMetadata): UInt =
|
||||
Mux(isWriteIntent(cmd), acquireExclusiveDirty, acquireExclusiveClean)
|
||||
|
||||
def getReleaseType(cmd: UInt, meta: ClientMetadata): UInt = {
|
||||
val dirty = meta.state isOneOf clientStatesWithDirtyData
|
||||
MuxLookup(cmd, releaseCopyAck, Array(
|
||||
M_FLUSH -> Mux(dirty, releaseInvalidateData, releaseInvalidateAck),
|
||||
M_PRODUCE -> Mux(dirty, releaseDowngradeData, releaseDowngradeAck),
|
||||
M_CLEAN -> Mux(dirty, releaseCopyData, releaseCopyAck)))
|
||||
}
|
||||
|
||||
def getReleaseType(incoming: HasProbeType, meta: ClientMetadata): UInt =
|
||||
MuxLookup(incoming.p_type, releaseInvalidateAck, Array(
|
||||
probeInvalidate -> getReleaseType(M_FLUSH, meta),
|
||||
probeDowngrade -> getReleaseType(M_FLUSH, meta),
|
||||
probeCopy -> getReleaseType(M_FLUSH, meta)))
|
||||
|
||||
def clientMetadataOnHit(cmd: UInt, meta: ClientMetadata) =
|
||||
ClientMetadata(Mux(isWrite(cmd), clientExclusiveDirty, meta.state))(meta.p)
|
||||
|
||||
def clientMetadataOnCacheControl(cmd: UInt, meta: ClientMetadata) =
|
||||
ClientMetadata(
|
||||
MuxLookup(cmd, meta.state, Array(
|
||||
M_FLUSH -> clientInvalid,
|
||||
M_CLEAN -> Mux(meta.state === clientExclusiveDirty, clientExclusiveClean, meta.state))))(meta.p)
|
||||
|
||||
def clientMetadataOnGrant(incoming: HasGrantType, cmd: UInt, meta: ClientMetadata) =
|
||||
ClientMetadata(
|
||||
Mux(incoming.isBuiltInType(), clientInvalid,
|
||||
Mux(isWrite(cmd), clientExclusiveDirty, clientExclusiveClean)))(meta.p)
|
||||
|
||||
def clientMetadataOnProbe(incoming: HasProbeType, meta: ClientMetadata) =
|
||||
ClientMetadata(
|
||||
MuxLookup(incoming.p_type, meta.state, Array(
|
||||
probeInvalidate -> clientInvalid,
|
||||
probeDowngrade -> clientInvalid,
|
||||
probeCopy -> clientInvalid)))(meta.p)
|
||||
|
||||
// Manager states and functions:
|
||||
val nManagerStates = 0 // We don't actually need any states for this protocol
|
||||
|
||||
def requiresProbes(a: HasAcquireType, meta: ManagerMetadata) = !dir.none(meta.sharers)
|
||||
def requiresProbes(cmd: UInt, meta: ManagerMetadata) = !dir.none(meta.sharers)
|
||||
|
||||
def getProbeType(cmd: UInt, meta: ManagerMetadata): UInt =
|
||||
MuxLookup(cmd, probeCopy, Array(
|
||||
M_FLUSH -> probeInvalidate,
|
||||
M_PRODUCE -> probeDowngrade))
|
||||
|
||||
def getProbeType(a: HasAcquireType, meta: ManagerMetadata): UInt =
|
||||
Mux(a.isBuiltInType(),
|
||||
MuxLookup(a.a_type, probeCopy, Array(
|
||||
Acquire.getBlockType -> probeCopy,
|
||||
Acquire.putBlockType -> probeInvalidate,
|
||||
Acquire.getType -> probeCopy,
|
||||
Acquire.putType -> probeInvalidate,
|
||||
Acquire.getPrefetchType -> probeCopy,
|
||||
Acquire.putPrefetchType -> probeInvalidate,
|
||||
Acquire.putAtomicType -> probeInvalidate)),
|
||||
probeInvalidate)
|
||||
|
||||
def getGrantType(a: HasAcquireType, meta: ManagerMetadata): UInt =
|
||||
Mux(a.isBuiltInType(), Acquire.getBuiltInGrantType(a.a_type), grantExclusive)
|
||||
def getExclusiveGrantType(): UInt = grantExclusive
|
||||
|
||||
def managerMetadataOnRelease(incoming: HasReleaseType, src: UInt, meta: ManagerMetadata) = {
|
||||
val popped = ManagerMetadata(sharers=dir.pop(meta.sharers, src))(meta.p)
|
||||
MuxCase(meta, Array(
|
||||
incoming.is(releaseInvalidateData) -> popped,
|
||||
incoming.is(releaseInvalidateAck) -> popped))
|
||||
}
|
||||
}
|
||||
|
||||
/** A protocol with only three Client states.
|
||||
* Data is always assumed to be dirty.
|
||||
* Multiple clients may share read permissions on a block at the same time.
|
||||
*/
|
||||
class MSICoherence(dir: DirectoryRepresentation) extends CoherencePolicy(dir) {
|
||||
// Message types
|
||||
val nAcquireTypes = 2
|
||||
val nProbeTypes = 3
|
||||
val nReleaseTypes = 6
|
||||
val nGrantTypes = 3
|
||||
|
||||
val acquireShared :: acquireExclusive :: Nil = Enum(UInt(), nAcquireTypes)
|
||||
val probeInvalidate :: probeDowngrade :: probeCopy :: Nil = Enum(UInt(), nProbeTypes)
|
||||
val releaseInvalidateData :: releaseDowngradeData :: releaseCopyData :: releaseInvalidateAck :: releaseDowngradeAck :: releaseCopyAck :: Nil = Enum(UInt(), nReleaseTypes)
|
||||
val grantShared :: grantExclusive :: grantExclusiveAck :: Nil = Enum(UInt(), nGrantTypes)
|
||||
|
||||
def releaseTypesWithData = Seq(releaseInvalidateData, releaseDowngradeData, releaseCopyData)
|
||||
def grantTypesWithData = Seq(grantShared, grantExclusive)
|
||||
|
||||
// Client states and functions
|
||||
val nClientStates = 3
|
||||
val clientInvalid :: clientShared :: clientExclusiveDirty :: Nil = Enum(UInt(), nClientStates)
|
||||
|
||||
def clientStatesWithReadPermission = Seq(clientShared, clientExclusiveDirty)
|
||||
def clientStatesWithWritePermission = Seq(clientExclusiveDirty)
|
||||
def clientStatesWithDirtyData = Seq(clientExclusiveDirty)
|
||||
|
||||
def isValid(meta: ClientMetadata): Bool = meta.state =/= clientInvalid
|
||||
|
||||
def getAcquireType(cmd: UInt, meta: ClientMetadata): UInt =
|
||||
Mux(isWriteIntent(cmd), acquireExclusive, acquireShared)
|
||||
|
||||
def getReleaseType(cmd: UInt, meta: ClientMetadata): UInt = {
|
||||
val dirty = meta.state isOneOf clientStatesWithDirtyData
|
||||
MuxLookup(cmd, releaseCopyAck, Array(
|
||||
M_FLUSH -> Mux(dirty, releaseInvalidateData, releaseInvalidateAck),
|
||||
M_PRODUCE -> Mux(dirty, releaseDowngradeData, releaseDowngradeAck),
|
||||
M_CLEAN -> Mux(dirty, releaseCopyData, releaseCopyAck)))
|
||||
}
|
||||
|
||||
def getReleaseType(incoming: HasProbeType, meta: ClientMetadata): UInt =
|
||||
MuxLookup(incoming.p_type, releaseInvalidateAck, Array(
|
||||
probeInvalidate -> getReleaseType(M_FLUSH, meta),
|
||||
probeDowngrade -> getReleaseType(M_PRODUCE, meta),
|
||||
probeCopy -> getReleaseType(M_PRODUCE, meta)))
|
||||
|
||||
def clientMetadataOnHit(cmd: UInt, meta: ClientMetadata) =
|
||||
ClientMetadata(Mux(isWrite(cmd), clientExclusiveDirty, meta.state))(meta.p)
|
||||
|
||||
def clientMetadataOnCacheControl(cmd: UInt, meta: ClientMetadata) =
|
||||
ClientMetadata(
|
||||
MuxLookup(cmd, meta.state, Array(
|
||||
M_FLUSH -> clientInvalid,
|
||||
M_PRODUCE -> Mux(meta.state isOneOf clientStatesWithWritePermission,
|
||||
clientShared, meta.state))))(meta.p)
|
||||
|
||||
def clientMetadataOnGrant(incoming: HasGrantType, cmd: UInt, meta: ClientMetadata) =
|
||||
ClientMetadata(
|
||||
Mux(incoming.isBuiltInType(), clientInvalid,
|
||||
MuxLookup(incoming.g_type, clientInvalid, Array(
|
||||
grantShared -> clientShared,
|
||||
grantExclusive -> clientExclusiveDirty,
|
||||
grantExclusiveAck -> clientExclusiveDirty))))(meta.p)
|
||||
|
||||
def clientMetadataOnProbe(incoming: HasProbeType, meta: ClientMetadata) =
|
||||
ClientMetadata(
|
||||
MuxLookup(incoming.p_type, meta.state, Array(
|
||||
probeInvalidate -> clientInvalid,
|
||||
probeDowngrade -> clientShared,
|
||||
probeCopy -> clientShared)))(meta.p)
|
||||
|
||||
// Manager states and functions:
|
||||
val nManagerStates = 0 // TODO: We could add a Shared state to avoid probing
|
||||
// only a single sharer (also would need
|
||||
// notification msg to track clean drops)
|
||||
// Also could avoid probes on outer WBs.
|
||||
|
||||
def requiresProbes(a: HasAcquireType, meta: ManagerMetadata) =
|
||||
Mux(dir.none(meta.sharers), Bool(false),
|
||||
Mux(dir.one(meta.sharers), Bool(true), //TODO: for now we assume it's Exclusive
|
||||
Mux(a.isBuiltInType(), a.hasData(), a.a_type =/= acquireShared)))
|
||||
|
||||
def requiresProbes(cmd: UInt, meta: ManagerMetadata) = !dir.none(meta.sharers)
|
||||
|
||||
def getProbeType(cmd: UInt, meta: ManagerMetadata): UInt =
|
||||
MuxLookup(cmd, probeCopy, Array(
|
||||
M_FLUSH -> probeInvalidate,
|
||||
M_PRODUCE -> probeDowngrade))
|
||||
|
||||
def getProbeType(a: HasAcquireType, meta: ManagerMetadata): UInt =
|
||||
Mux(a.isBuiltInType(),
|
||||
MuxLookup(a.a_type, probeCopy, Array(
|
||||
Acquire.getBlockType -> probeCopy,
|
||||
Acquire.putBlockType -> probeInvalidate,
|
||||
Acquire.getType -> probeCopy,
|
||||
Acquire.putType -> probeInvalidate,
|
||||
Acquire.getPrefetchType -> probeCopy,
|
||||
Acquire.putPrefetchType -> probeInvalidate,
|
||||
Acquire.putAtomicType -> probeInvalidate)),
|
||||
MuxLookup(a.a_type, probeCopy, Array(
|
||||
acquireShared -> probeDowngrade,
|
||||
acquireExclusive -> probeInvalidate)))
|
||||
|
||||
def getGrantType(a: HasAcquireType, meta: ManagerMetadata): UInt =
|
||||
Mux(a.isBuiltInType(), Acquire.getBuiltInGrantType(a.a_type),
|
||||
Mux(a.a_type === acquireShared,
|
||||
Mux(!dir.none(meta.sharers), grantShared, grantExclusive),
|
||||
grantExclusive))
|
||||
def getExclusiveGrantType(): UInt = grantExclusive
|
||||
|
||||
def managerMetadataOnRelease(incoming: HasReleaseType, src: UInt, meta: ManagerMetadata) = {
|
||||
val popped = ManagerMetadata(sharers=dir.pop(meta.sharers, src))(meta.p)
|
||||
MuxCase(meta, Array(
|
||||
incoming.is(releaseInvalidateData) -> popped,
|
||||
incoming.is(releaseInvalidateAck) -> popped))
|
||||
}
|
||||
}
|
||||
|
||||
/** A protocol with four Client states.
|
||||
* Data is marked as dirty when written.
|
||||
* Multiple clients may share read permissions on a block at the same time.
|
||||
*/
|
||||
class MESICoherence(dir: DirectoryRepresentation) extends CoherencePolicy(dir) {
|
||||
// Message types
|
||||
val nAcquireTypes = 2
|
||||
val nProbeTypes = 3
|
||||
val nReleaseTypes = 6
|
||||
val nGrantTypes = 3
|
||||
|
||||
val acquireShared :: acquireExclusive :: Nil = Enum(UInt(), nAcquireTypes)
|
||||
val probeInvalidate :: probeDowngrade :: probeCopy :: Nil = Enum(UInt(), nProbeTypes)
|
||||
val releaseInvalidateData :: releaseDowngradeData :: releaseCopyData :: releaseInvalidateAck :: releaseDowngradeAck :: releaseCopyAck :: Nil = Enum(UInt(), nReleaseTypes)
|
||||
val grantShared :: grantExclusive :: grantExclusiveAck :: Nil = Enum(UInt(), nGrantTypes)
|
||||
|
||||
def releaseTypesWithData = Seq(releaseInvalidateData, releaseDowngradeData, releaseCopyData)
|
||||
def grantTypesWithData = Seq(grantShared, grantExclusive)
|
||||
|
||||
// Client states and functions
|
||||
val nClientStates = 4
|
||||
val clientInvalid :: clientShared :: clientExclusiveClean :: clientExclusiveDirty :: Nil = Enum(UInt(), nClientStates)
|
||||
|
||||
def clientStatesWithReadPermission = Seq(clientShared, clientExclusiveClean, clientExclusiveDirty)
|
||||
def clientStatesWithWritePermission = Seq(clientExclusiveClean, clientExclusiveDirty)
|
||||
def clientStatesWithDirtyData = Seq(clientExclusiveDirty)
|
||||
|
||||
def isValid(meta: ClientMetadata): Bool = meta.state =/= clientInvalid
|
||||
|
||||
def getAcquireType(cmd: UInt, meta: ClientMetadata): UInt =
|
||||
Mux(isWriteIntent(cmd), acquireExclusive, acquireShared)
|
||||
|
||||
def getReleaseType(cmd: UInt, meta: ClientMetadata): UInt = {
|
||||
val dirty = meta.state isOneOf clientStatesWithDirtyData
|
||||
MuxLookup(cmd, releaseCopyAck, Array(
|
||||
M_FLUSH -> Mux(dirty, releaseInvalidateData, releaseInvalidateAck),
|
||||
M_PRODUCE -> Mux(dirty, releaseDowngradeData, releaseDowngradeAck),
|
||||
M_CLEAN -> Mux(dirty, releaseCopyData, releaseCopyAck)))
|
||||
}
|
||||
|
||||
def getReleaseType(incoming: HasProbeType, meta: ClientMetadata): UInt =
|
||||
MuxLookup(incoming.p_type, releaseInvalidateAck, Array(
|
||||
probeInvalidate -> getReleaseType(M_FLUSH, meta),
|
||||
probeDowngrade -> getReleaseType(M_PRODUCE, meta),
|
||||
probeCopy -> getReleaseType(M_PRODUCE, meta)))
|
||||
|
||||
def clientMetadataOnHit(cmd: UInt, meta: ClientMetadata) =
|
||||
ClientMetadata(Mux(isWrite(cmd), clientExclusiveDirty, meta.state))(meta.p)
|
||||
|
||||
def clientMetadataOnCacheControl(cmd: UInt, meta: ClientMetadata) =
|
||||
ClientMetadata(
|
||||
MuxLookup(cmd, meta.state, Array(
|
||||
M_FLUSH -> clientInvalid,
|
||||
M_PRODUCE -> Mux(meta.state isOneOf clientStatesWithWritePermission,
|
||||
clientShared, meta.state),
|
||||
M_CLEAN -> Mux(meta.state === clientExclusiveDirty,
|
||||
clientExclusiveClean, meta.state))))(meta.p)
|
||||
|
||||
def clientMetadataOnGrant(incoming: HasGrantType, cmd: UInt, meta: ClientMetadata) =
|
||||
ClientMetadata(
|
||||
Mux(incoming.isBuiltInType(), clientInvalid,
|
||||
MuxLookup(incoming.g_type, clientInvalid, Array(
|
||||
grantShared -> clientShared,
|
||||
grantExclusive -> Mux(isWrite(cmd), clientExclusiveDirty, clientExclusiveClean),
|
||||
grantExclusiveAck -> clientExclusiveDirty))))(meta.p)
|
||||
|
||||
def clientMetadataOnProbe(incoming: HasProbeType, meta: ClientMetadata) =
|
||||
ClientMetadata(
|
||||
MuxLookup(incoming.p_type, meta.state, Array(
|
||||
probeInvalidate -> clientInvalid,
|
||||
probeDowngrade -> clientShared,
|
||||
probeCopy -> clientShared)))(meta.p)
|
||||
|
||||
// Manager states and functions:
|
||||
val nManagerStates = 0 // TODO: We could add a Shared state to avoid probing
|
||||
// only a single sharer (also would need
|
||||
// notification msg to track clean drops)
|
||||
// Also could avoid probes on outer WBs.
|
||||
|
||||
def requiresProbes(a: HasAcquireType, meta: ManagerMetadata) =
|
||||
Mux(dir.none(meta.sharers), Bool(false),
|
||||
Mux(dir.one(meta.sharers), Bool(true), //TODO: for now we assume it's Exclusive
|
||||
Mux(a.isBuiltInType(), a.hasData(), a.a_type =/= acquireShared)))
|
||||
|
||||
def requiresProbes(cmd: UInt, meta: ManagerMetadata) = !dir.none(meta.sharers)
|
||||
|
||||
def getProbeType(cmd: UInt, meta: ManagerMetadata): UInt =
|
||||
MuxLookup(cmd, probeCopy, Array(
|
||||
M_FLUSH -> probeInvalidate,
|
||||
M_PRODUCE -> probeDowngrade))
|
||||
|
||||
def getProbeType(a: HasAcquireType, meta: ManagerMetadata): UInt =
|
||||
Mux(a.isBuiltInType(),
|
||||
MuxLookup(a.a_type, probeCopy, Array(
|
||||
Acquire.getBlockType -> probeCopy,
|
||||
Acquire.putBlockType -> probeInvalidate,
|
||||
Acquire.getType -> probeCopy,
|
||||
Acquire.putType -> probeInvalidate,
|
||||
Acquire.getPrefetchType -> probeCopy,
|
||||
Acquire.putPrefetchType -> probeInvalidate,
|
||||
Acquire.putAtomicType -> probeInvalidate)),
|
||||
MuxLookup(a.a_type, probeCopy, Array(
|
||||
acquireShared -> probeDowngrade,
|
||||
acquireExclusive -> probeInvalidate)))
|
||||
|
||||
def getGrantType(a: HasAcquireType, meta: ManagerMetadata): UInt =
|
||||
Mux(a.isBuiltInType(), Acquire.getBuiltInGrantType(a.a_type),
|
||||
Mux(a.a_type === acquireShared,
|
||||
Mux(!dir.none(meta.sharers), grantShared, grantExclusive),
|
||||
grantExclusive))
|
||||
def getExclusiveGrantType(): UInt = grantExclusive
|
||||
|
||||
def managerMetadataOnRelease(incoming: HasReleaseType, src: UInt, meta: ManagerMetadata) = {
|
||||
val popped = ManagerMetadata(sharers=dir.pop(meta.sharers, src))(meta.p)
|
||||
MuxCase(meta, Array(
|
||||
incoming.is(releaseInvalidateData) -> popped,
|
||||
incoming.is(releaseInvalidateAck) -> popped))
|
||||
}
|
||||
}
|
||||
|
||||
class MigratoryCoherence(dir: DirectoryRepresentation) extends CoherencePolicy(dir) {
|
||||
// Message types
|
||||
val nAcquireTypes = 3
|
||||
val nProbeTypes = 4
|
||||
val nReleaseTypes = 10
|
||||
val nGrantTypes = 4
|
||||
|
||||
val acquireShared :: acquireExclusive :: acquireInvalidateOthers :: Nil = Enum(UInt(), nAcquireTypes)
|
||||
val probeInvalidate :: probeDowngrade :: probeCopy :: probeInvalidateOthers :: Nil = Enum(UInt(), nProbeTypes)
|
||||
val releaseInvalidateData :: releaseDowngradeData :: releaseCopyData :: releaseInvalidateAck :: releaseDowngradeAck :: releaseCopyAck :: releaseDowngradeDataMigratory :: releaseDowngradeAckHasCopy :: releaseInvalidateDataMigratory :: releaseInvalidateAckMigratory :: Nil = Enum(UInt(), nReleaseTypes)
|
||||
val grantShared :: grantExclusive :: grantExclusiveAck :: grantReadMigratory :: Nil = Enum(UInt(), nGrantTypes)
|
||||
|
||||
def releaseTypesWithData = Seq(releaseInvalidateData, releaseDowngradeData, releaseCopyData, releaseInvalidateDataMigratory, releaseDowngradeDataMigratory)
|
||||
def grantTypesWithData = Seq(grantShared, grantExclusive, grantReadMigratory)
|
||||
|
||||
// Client states and functions
|
||||
val nClientStates = 7
|
||||
val clientInvalid :: clientShared :: clientExclusiveClean :: clientExclusiveDirty :: clientSharedByTwo :: clientMigratoryClean :: clientMigratoryDirty :: Nil = Enum(UInt(), nClientStates)
|
||||
|
||||
def clientStatesWithReadPermission = Seq(clientShared, clientExclusiveClean, clientExclusiveDirty, clientSharedByTwo, clientMigratoryClean, clientMigratoryDirty)
|
||||
def clientStatesWithWritePermission = Seq(clientExclusiveClean, clientExclusiveDirty, clientMigratoryClean, clientMigratoryDirty)
|
||||
def clientStatesWithDirtyData = Seq(clientExclusiveDirty, clientMigratoryDirty)
|
||||
|
||||
def isValid (meta: ClientMetadata): Bool = meta.state =/= clientInvalid
|
||||
|
||||
def getAcquireType(cmd: UInt, meta: ClientMetadata): UInt =
|
||||
Mux(isWriteIntent(cmd),
|
||||
Mux(meta.state === clientInvalid, acquireExclusive, acquireInvalidateOthers),
|
||||
acquireShared)
|
||||
|
||||
def getReleaseType(cmd: UInt, meta: ClientMetadata): UInt = {
|
||||
val dirty = meta.state isOneOf clientStatesWithDirtyData
|
||||
MuxLookup(cmd, releaseCopyAck, Array(
|
||||
M_FLUSH -> Mux(dirty, releaseInvalidateData, releaseInvalidateAck),
|
||||
M_PRODUCE -> Mux(dirty, releaseDowngradeData, releaseDowngradeAck),
|
||||
M_CLEAN -> Mux(dirty, releaseCopyData, releaseCopyAck)))
|
||||
}
|
||||
|
||||
def getReleaseType(incoming: HasProbeType, meta: ClientMetadata): UInt = {
|
||||
val dirty = meta.state isOneOf clientStatesWithDirtyData
|
||||
val with_data = MuxLookup(incoming.p_type, releaseInvalidateData, Array(
|
||||
probeInvalidate -> Mux(meta.state isOneOf (clientExclusiveDirty, clientMigratoryDirty),
|
||||
releaseInvalidateDataMigratory, releaseInvalidateData),
|
||||
probeDowngrade -> Mux(meta.state === clientMigratoryDirty,
|
||||
releaseDowngradeDataMigratory, releaseDowngradeData),
|
||||
probeCopy -> releaseCopyData))
|
||||
val without_data = MuxLookup(incoming.p_type, releaseInvalidateAck, Array(
|
||||
probeInvalidate -> Mux(clientExclusiveClean === meta.state,
|
||||
releaseInvalidateAckMigratory, releaseInvalidateAck),
|
||||
probeInvalidateOthers -> Mux(clientSharedByTwo === meta.state,
|
||||
releaseInvalidateAckMigratory, releaseInvalidateAck),
|
||||
probeDowngrade -> Mux(meta.state =/= clientInvalid,
|
||||
releaseDowngradeAckHasCopy, releaseDowngradeAck),
|
||||
probeCopy -> Mux(meta.state =/= clientInvalid,
|
||||
releaseDowngradeAckHasCopy, releaseDowngradeAck)))
|
||||
Mux(dirty, with_data, without_data)
|
||||
}
|
||||
|
||||
def clientMetadataOnHit(cmd: UInt, meta: ClientMetadata) =
|
||||
ClientMetadata(
|
||||
Mux(isWrite(cmd), MuxLookup(meta.state, clientExclusiveDirty, Array(
|
||||
clientExclusiveClean -> clientExclusiveDirty,
|
||||
clientMigratoryClean -> clientMigratoryDirty)),
|
||||
meta.state))(meta.p)
|
||||
|
||||
def clientMetadataOnCacheControl(cmd: UInt, meta: ClientMetadata) =
|
||||
ClientMetadata(
|
||||
MuxLookup(cmd, meta.state, Array(
|
||||
M_FLUSH -> clientInvalid,
|
||||
M_PRODUCE -> Mux(meta.state isOneOf clientStatesWithWritePermission,
|
||||
clientShared, meta.state),
|
||||
M_CLEAN -> MuxLookup(meta.state, meta.state, Array(
|
||||
clientExclusiveDirty -> clientExclusiveClean,
|
||||
clientMigratoryDirty -> clientMigratoryClean)))))(meta.p)
|
||||
|
||||
def clientMetadataOnGrant(incoming: HasGrantType, cmd: UInt, meta: ClientMetadata) =
|
||||
ClientMetadata(
|
||||
Mux(incoming.isBuiltInType(), clientInvalid,
|
||||
MuxLookup(incoming.g_type, clientInvalid, Array(
|
||||
grantShared -> clientShared,
|
||||
grantExclusive -> Mux(isWrite(cmd), clientExclusiveDirty, clientExclusiveClean),
|
||||
grantExclusiveAck -> clientExclusiveDirty,
|
||||
grantReadMigratory -> Mux(isWrite(cmd),
|
||||
clientMigratoryDirty, clientMigratoryClean)))))(meta.p)
|
||||
|
||||
def clientMetadataOnProbe(incoming: HasProbeType, meta: ClientMetadata) = {
|
||||
val downgradeState = MuxLookup(meta.state, clientShared, Array(
|
||||
clientExclusiveClean -> clientSharedByTwo,
|
||||
clientExclusiveDirty -> clientSharedByTwo,
|
||||
clientSharedByTwo -> clientShared,
|
||||
clientMigratoryClean -> clientSharedByTwo,
|
||||
clientMigratoryDirty -> clientInvalid))
|
||||
ClientMetadata(
|
||||
MuxLookup(incoming.p_type, meta.state, Array(
|
||||
probeInvalidate -> clientInvalid,
|
||||
probeInvalidateOthers -> clientInvalid,
|
||||
probeDowngrade -> downgradeState,
|
||||
probeCopy -> downgradeState)))(meta.p)
|
||||
}
|
||||
|
||||
// Manager states and functions:
|
||||
val nManagerStates = 0 // TODO: we could add some states to reduce the number of message types
|
||||
|
||||
def requiresProbes(a: HasAcquireType, meta: ManagerMetadata) =
|
||||
Mux(dir.none(meta.sharers), Bool(false),
|
||||
Mux(dir.one(meta.sharers), Bool(true), //TODO: for now we assume it's Exclusive
|
||||
Mux(a.isBuiltInType(), a.hasData(), a.a_type =/= acquireShared)))
|
||||
|
||||
def requiresProbes(cmd: UInt, meta: ManagerMetadata) = !dir.none(meta.sharers)
|
||||
|
||||
def getProbeType(cmd: UInt, meta: ManagerMetadata): UInt =
|
||||
MuxLookup(cmd, probeCopy, Array(
|
||||
M_FLUSH -> probeInvalidate,
|
||||
M_PRODUCE -> probeDowngrade))
|
||||
|
||||
def getProbeType(a: HasAcquireType, meta: ManagerMetadata): UInt =
|
||||
Mux(a.isBuiltInType(),
|
||||
MuxLookup(a.a_type, probeCopy, Array(
|
||||
Acquire.getBlockType -> probeCopy,
|
||||
Acquire.putBlockType -> probeInvalidate,
|
||||
Acquire.getType -> probeCopy,
|
||||
Acquire.putType -> probeInvalidate,
|
||||
Acquire.getPrefetchType -> probeCopy,
|
||||
Acquire.putPrefetchType -> probeInvalidate,
|
||||
Acquire.putAtomicType -> probeInvalidate)),
|
||||
MuxLookup(a.a_type, probeCopy, Array(
|
||||
acquireShared -> probeDowngrade,
|
||||
acquireExclusive -> probeInvalidate,
|
||||
acquireInvalidateOthers -> probeInvalidateOthers)))
|
||||
|
||||
def getGrantType(a: HasAcquireType, meta: ManagerMetadata): UInt =
|
||||
Mux(a.isBuiltInType(), Acquire.getBuiltInGrantType(a.a_type),
|
||||
MuxLookup(a.a_type, grantShared, Array(
|
||||
acquireShared -> Mux(!dir.none(meta.sharers), grantShared, grantExclusive),
|
||||
acquireExclusive -> grantExclusive,
|
||||
acquireInvalidateOthers -> grantExclusiveAck))) //TODO: add this to MESI for broadcast?
|
||||
def getExclusiveGrantType(): UInt = grantExclusive
|
||||
|
||||
def managerMetadataOnRelease(incoming: HasReleaseType, src: UInt, meta: ManagerMetadata) = {
|
||||
val popped = ManagerMetadata(sharers=dir.pop(meta.sharers, src))(meta.p)
|
||||
MuxCase(meta, Array(
|
||||
incoming.is(releaseInvalidateData) -> popped,
|
||||
incoming.is(releaseInvalidateAck) -> popped,
|
||||
incoming.is(releaseInvalidateDataMigratory) -> popped,
|
||||
incoming.is(releaseInvalidateAckMigratory) -> popped))
|
||||
}
|
||||
}
|
||||
424
src/main/scala/uncore/converters/Ahb.scala
Normal file
424
src/main/scala/uncore/converters/Ahb.scala
Normal file
@@ -0,0 +1,424 @@
|
||||
package uncore.converters
|
||||
|
||||
import Chisel._
|
||||
import junctions._
|
||||
import uncore.tilelink._
|
||||
import uncore.util._
|
||||
import uncore.constants._
|
||||
import cde.{Parameters, Field}
|
||||
import HastiConstants._
|
||||
|
||||
/* We need to translate TileLink requests into operations we can actually execute on AHB.
|
||||
* The general plan of attack is:
|
||||
* get => one AHB=>TL read
|
||||
* put => [multiple AHB write fragments=>nill], one AHB write=>TL
|
||||
* getBlock => AHB burst reads =>TL
|
||||
* putBlock => AHB burst writes=>TL
|
||||
* getPrefetch => noop=>TL
|
||||
* putPrefetch => noop=>TL
|
||||
* putAtomic => one AHB=>TL read, one idle, one AHB atom_write=>nill, one idle
|
||||
*
|
||||
* This requires that we support a pipeline of optional AHB requests with optional TL responses
|
||||
*/
|
||||
class AHBRequestIO(implicit p: Parameters) extends HastiMasterIO
|
||||
with HasGrantType
|
||||
with HasClientTransactionId
|
||||
with HasTileLinkBeatId {
|
||||
val executeAHB = Bool()
|
||||
val respondTL = Bool()
|
||||
val latchAtom = Bool()
|
||||
val firstBurst = Bool()
|
||||
val finalBurst = Bool()
|
||||
val cmd = Bits(width = M_SZ) // atomic op
|
||||
}
|
||||
|
||||
// AHB stage1: translate TileLink Acquires into AHBRequests
|
||||
class AHBTileLinkIn(supportAtomics: Boolean = false)(implicit val p: Parameters) extends Module
|
||||
with HasHastiParameters
|
||||
with HasTileLinkParameters
|
||||
with HasAddrMapParameters {
|
||||
val io = new Bundle {
|
||||
val acquire = new DecoupledIO(new Acquire).flip // NOTE: acquire must be either a Queue or a Pipe
|
||||
val request = new DecoupledIO(new AHBRequestIO)
|
||||
}
|
||||
|
||||
// Match the AHB burst with a TileLink {Put,Get}Block
|
||||
val burstSize = tlDataBeats match {
|
||||
case 1 => HBURST_SINGLE
|
||||
// case 2 not supported by AHB
|
||||
case 4 => HBURST_WRAP4
|
||||
case 8 => HBURST_WRAP8
|
||||
case 16 => HBURST_WRAP16
|
||||
case _ => throw new java.lang.AssertionError("TileLink beats unsupported by AHB")
|
||||
}
|
||||
|
||||
// Bursts start at 0 and wrap-around back to 0
|
||||
val finalBurst = UInt(tlDataBeats-1, width = log2Up(tlDataBeats)).asUInt
|
||||
val firstBurst = UInt(0, width = log2Up(tlDataBeats))
|
||||
val next_wmask = Wire(UInt(width = tlDataBytes)) // calculated below
|
||||
|
||||
// State variables for processing more complicated TileLink Acquires
|
||||
val s_atom_r :: s_atom_idle1 :: s_atom_w :: s_atom_idle2 :: Nil = Enum(UInt(), 4)
|
||||
val atom_state = Reg(init = s_atom_r) // never changes if !supportAtomics
|
||||
val done_wmask = Reg(init = UInt(0, width = tlDataBytes))
|
||||
val burst = Reg(init = firstBurst)
|
||||
|
||||
// Grab some view of the TileLink acquire
|
||||
val acq_wmask = io.acquire.bits.wmask()
|
||||
val isReadBurst = io.acquire.bits.is(Acquire.getBlockType)
|
||||
val isWriteBurst = io.acquire.bits.is(Acquire.putBlockType)
|
||||
val isBurst = isWriteBurst || isReadBurst
|
||||
val isAtomic = io.acquire.bits.is(Acquire.putAtomicType) && Bool(supportAtomics)
|
||||
val isPut = io.acquire.bits.is(Acquire.putType)
|
||||
|
||||
// Final states?
|
||||
val last_wmask = next_wmask === acq_wmask
|
||||
val last_atom = atom_state === s_atom_idle2
|
||||
val last_burst = burst === finalBurst
|
||||
|
||||
// Block the incoming request until we've fully consumed it
|
||||
// NOTE: the outgoing grant.valid may happen while acquire.ready is still false;
|
||||
// for this reason it is essential to have a Queue or a Pipe infront of acquire
|
||||
io.acquire.ready := io.request.ready && MuxLookup(io.acquire.bits.a_type, Bool(true), Array(
|
||||
Acquire.getType -> Bool(true),
|
||||
Acquire.getBlockType -> last_burst, // hold it until the last beat is burst
|
||||
Acquire.putType -> last_wmask, // only accept the put if we can fully consume its wmask
|
||||
Acquire.putBlockType -> Bool(true),
|
||||
Acquire.putAtomicType -> last_atom, // atomic operation stages complete
|
||||
Acquire.getPrefetchType -> Bool(true),
|
||||
Acquire.putPrefetchType -> Bool(true)))
|
||||
|
||||
// Advance the fragment state
|
||||
when (io.request.ready && io.acquire.valid && isPut) {
|
||||
when (last_wmask) { // if this was the last fragment, restart FSM
|
||||
done_wmask := UInt(0)
|
||||
} .otherwise {
|
||||
done_wmask := next_wmask
|
||||
}
|
||||
}
|
||||
|
||||
// Advance the burst state
|
||||
// We assume here that TileLink gives us all putBlock beats with nothing between them
|
||||
when (io.request.ready && io.acquire.valid && isBurst) {
|
||||
when (last_burst) {
|
||||
burst := UInt(0)
|
||||
} .otherwise {
|
||||
burst := burst + UInt(1)
|
||||
}
|
||||
}
|
||||
|
||||
// Advance the atomic state machine
|
||||
when (io.request.ready && io.acquire.valid && isAtomic) {
|
||||
switch (atom_state) {
|
||||
is (s_atom_r) { atom_state := s_atom_idle1 }
|
||||
is (s_atom_idle1) { atom_state := s_atom_w } // idle1 => AMOALU runs on a different clock than AHB slave read
|
||||
is (s_atom_w) { atom_state := s_atom_idle2 }
|
||||
is (s_atom_idle2) { atom_state := s_atom_r } // idle2 state is required by AHB after hmastlock is lowered
|
||||
}
|
||||
}
|
||||
|
||||
// Returns (range=0, range=-1, aligned_wmask, size)
|
||||
def mask_helper(in_0 : Bool, range : UInt): (Bool, Bool, UInt, UInt) = {
|
||||
val len = range.getWidth
|
||||
if (len == 1) {
|
||||
(range === UInt(0), range === UInt(1), in_0.asUInt() & range, UInt(0))
|
||||
} else {
|
||||
val mid = len / 2
|
||||
val lo = range(mid-1, 0)
|
||||
val hi = range(len-1, mid)
|
||||
val (lo_0, lo_1, lo_m, lo_s) = mask_helper(in_0, lo)
|
||||
val (hi_0, hi_1, hi_m, hi_s) = mask_helper(in_0 && lo_0, hi)
|
||||
val out_0 = lo_0 && hi_0
|
||||
val out_1 = lo_1 && hi_1
|
||||
val out_m = Cat(hi_m, lo_m) | Fill(len, (in_0 && out_1).asUInt())
|
||||
val out_s = Mux(out_1, UInt(log2Up(len)), Mux(lo_0, hi_s, lo_s))
|
||||
(out_0, out_1, out_m, out_s)
|
||||
}
|
||||
}
|
||||
|
||||
val pending_wmask = acq_wmask & ~done_wmask
|
||||
val put_addr = PriorityEncoder(pending_wmask)
|
||||
val (wmask_0, _, exec_wmask, put_size) = mask_helper(Bool(true), pending_wmask)
|
||||
next_wmask := done_wmask | exec_wmask
|
||||
|
||||
// Calculate the address, with consideration to put fragments and bursts
|
||||
val addr_block = io.acquire.bits.addr_block
|
||||
val addr_beatin= io.acquire.bits.addr_beat
|
||||
val addr_burst = Mux(isReadBurst, addr_beatin + burst, addr_beatin)
|
||||
val addr_byte = Mux(isPut, put_addr, io.acquire.bits.addr_byte())
|
||||
val addr_beat = Mux(isWriteBurst, UInt(0), addr_burst)
|
||||
val ahbAddr = Cat(addr_block, addr_burst, addr_byte)
|
||||
val ahbSize = Mux(isPut, put_size, Mux(isBurst, UInt(log2Ceil(tlDataBytes)), io.acquire.bits.op_size()))
|
||||
|
||||
val ahbBurst = MuxLookup(io.acquire.bits.a_type, HBURST_SINGLE, Array(
|
||||
Acquire.getType -> HBURST_SINGLE,
|
||||
Acquire.getBlockType -> burstSize,
|
||||
Acquire.putType -> HBURST_SINGLE,
|
||||
Acquire.putBlockType -> burstSize,
|
||||
Acquire.putAtomicType -> HBURST_SINGLE,
|
||||
Acquire.getPrefetchType -> HBURST_SINGLE,
|
||||
Acquire.putPrefetchType -> HBURST_SINGLE))
|
||||
|
||||
val ahbWrite = MuxLookup(io.acquire.bits.a_type, Bool(false), Array(
|
||||
Acquire.getType -> Bool(false),
|
||||
Acquire.getBlockType -> Bool(false),
|
||||
Acquire.putType -> Bool(true),
|
||||
Acquire.putBlockType -> Bool(true),
|
||||
Acquire.putAtomicType -> MuxLookup(atom_state, Bool(false), Array(
|
||||
s_atom_r -> Bool(false),
|
||||
s_atom_idle1 -> Bool(false), // don't care
|
||||
s_atom_w -> Bool(true),
|
||||
s_atom_idle2 -> Bool(true))), // don't care
|
||||
Acquire.getPrefetchType -> Bool(false), // don't care
|
||||
Acquire.putPrefetchType -> Bool(true))) // don't care
|
||||
|
||||
val ahbExecute = MuxLookup(io.acquire.bits.a_type, Bool(false), Array(
|
||||
Acquire.getType -> Bool(true),
|
||||
Acquire.getBlockType -> Bool(true),
|
||||
Acquire.putType -> !wmask_0, // handle the case of a Put with no bytes!
|
||||
Acquire.putBlockType -> Bool(true),
|
||||
Acquire.putAtomicType -> MuxLookup(atom_state, Bool(false), Array(
|
||||
s_atom_r -> Bool(true),
|
||||
s_atom_idle1 -> Bool(false),
|
||||
s_atom_w -> Bool(true),
|
||||
s_atom_idle2 -> Bool(false))),
|
||||
Acquire.getPrefetchType -> Bool(false),
|
||||
Acquire.putPrefetchType -> Bool(false)))
|
||||
|
||||
val respondTL = MuxLookup(io.acquire.bits.a_type, Bool(false), Array(
|
||||
Acquire.getType -> Bool(true),
|
||||
Acquire.getBlockType -> Bool(true),
|
||||
Acquire.putType -> last_wmask,
|
||||
Acquire.putBlockType -> last_burst,
|
||||
Acquire.putAtomicType -> MuxLookup(atom_state, Bool(false), Array(
|
||||
s_atom_r -> Bool(true), // they want the old data
|
||||
s_atom_idle1 -> Bool(false),
|
||||
s_atom_w -> Bool(false),
|
||||
s_atom_idle2 -> Bool(false))),
|
||||
Acquire.getPrefetchType -> Bool(true),
|
||||
Acquire.putPrefetchType -> Bool(true)))
|
||||
|
||||
io.request.valid := io.acquire.valid
|
||||
io.request.bits.htrans := HTRANS_IDLE // unused/ignored
|
||||
io.request.bits.haddr := ahbAddr
|
||||
io.request.bits.hmastlock := isAtomic && atom_state =/= s_atom_idle2
|
||||
io.request.bits.hwrite := ahbWrite
|
||||
io.request.bits.hburst := ahbBurst
|
||||
io.request.bits.hsize := ahbSize
|
||||
io.request.bits.hprot := HPROT_DATA | HPROT_PRIVILEGED
|
||||
io.request.bits.hwdata := io.acquire.bits.data
|
||||
io.request.bits.executeAHB := ahbExecute
|
||||
io.request.bits.respondTL := respondTL
|
||||
io.request.bits.latchAtom := isAtomic && atom_state === s_atom_r
|
||||
io.request.bits.firstBurst := burst === firstBurst
|
||||
io.request.bits.finalBurst := burst === finalBurst || !isBurst
|
||||
io.request.bits.cmd := io.acquire.bits.op_code()
|
||||
io.request.bits.is_builtin_type := Bool(true)
|
||||
io.request.bits.g_type := io.acquire.bits.getBuiltInGrantType()
|
||||
io.request.bits.client_xact_id := io.acquire.bits.client_xact_id
|
||||
io.request.bits.addr_beat := addr_beat
|
||||
|
||||
val debugBurst = Reg(UInt())
|
||||
when (io.request.valid) {
|
||||
debugBurst := addr_burst - burst
|
||||
}
|
||||
|
||||
// We only support built-in TileLink requests
|
||||
assert(!io.acquire.valid || io.acquire.bits.is_builtin_type, "AHB bridge only supports builtin TileLink types")
|
||||
// Ensure alignment of address to size
|
||||
assert(!io.acquire.valid || (ahbAddr & ((UInt(1) << ahbSize) - UInt(1))) === UInt(0), "TileLink operation misaligned")
|
||||
// If this is a putBlock, make sure it moves properly
|
||||
assert(!io.acquire.valid || !isBurst || burst === firstBurst || debugBurst === addr_burst - burst, "TileLink putBlock beats not sequential")
|
||||
// We better not get an incomplete TileLink acquire
|
||||
assert(!io.acquire.valid || isBurst || burst === firstBurst, "TileLink never completed a putBlock")
|
||||
// If we disabled atomic support, we better not see a request
|
||||
assert(!io.acquire.bits.is(Acquire.putAtomicType) || Bool(supportAtomics))
|
||||
}
|
||||
|
||||
// AHB stage2: execute AHBRequests
|
||||
class AHBBusMaster(supportAtomics: Boolean = false)(implicit val p: Parameters) extends Module
|
||||
with HasHastiParameters
|
||||
with HasTileLinkParameters
|
||||
with HasAddrMapParameters {
|
||||
val io = new Bundle {
|
||||
val request = new DecoupledIO(new AHBRequestIO).flip
|
||||
val grant = new DecoupledIO(new Grant)
|
||||
val ahb = new HastiMasterIO()
|
||||
}
|
||||
|
||||
// All AHB outputs are registered (they might be IOs)
|
||||
val midBurst = Reg(init = Bool(false))
|
||||
val htrans = Reg(init = HTRANS_IDLE)
|
||||
val haddr = Reg(UInt())
|
||||
val hmastlock = Reg(init = Bool(false))
|
||||
val hwrite = Reg(Bool())
|
||||
val hburst = Reg(UInt())
|
||||
val hsize = Reg(init = UInt(0, width = SZ_HSIZE))
|
||||
val hprot = Reg(UInt())
|
||||
val hwdata0 = Reg(Bits())
|
||||
val hwdata1 = Reg(Bits())
|
||||
val hrdata = Reg(Bits())
|
||||
|
||||
io.ahb.htrans := htrans
|
||||
io.ahb.haddr := haddr
|
||||
io.ahb.hmastlock := hmastlock
|
||||
io.ahb.hwrite := hwrite
|
||||
io.ahb.hburst := hburst
|
||||
io.ahb.hsize := hsize
|
||||
io.ahb.hprot := hprot
|
||||
io.ahb.hwdata := hwdata1 // one cycle after the address phase
|
||||
|
||||
// TileLink response data needed in data phase
|
||||
val respondTL0 = Reg(init = Bool(false))
|
||||
val respondTL1 = Reg(init = Bool(false))
|
||||
val latchAtom0 = Reg(init = Bool(false))
|
||||
val latchAtom1 = Reg(init = Bool(false))
|
||||
val executeAHB0 = Reg(init = Bool(false))
|
||||
val executeAHB1 = Reg(init = Bool(false))
|
||||
val bubble = Reg(init = Bool(true)) // nothing useful in address phase
|
||||
val cmd = Reg(Bits())
|
||||
val g_type0 = Reg(UInt())
|
||||
val g_type1 = Reg(UInt())
|
||||
val client_xact_id0 = Reg(Bits())
|
||||
val client_xact_id1 = Reg(Bits())
|
||||
val addr_beat0 = Reg(UInt())
|
||||
val addr_beat1 = Reg(UInt())
|
||||
val grant1 = Reg(new Grant)
|
||||
|
||||
// It is allowed to progress from Idle/Busy during a wait state
|
||||
val addrReady = io.ahb.hready || bubble || (!executeAHB1 && !executeAHB0)
|
||||
val dataReady = io.ahb.hready || !executeAHB1
|
||||
|
||||
// Only accept a new AHBRequest if we have enough buffer space in the pad
|
||||
// to accomodate a persistent drop in TileLink's grant.ready
|
||||
io.request.ready := addrReady && io.grant.ready
|
||||
|
||||
// htrans must be updated even if no request is valid
|
||||
when (addrReady) {
|
||||
when (io.request.fire() && io.request.bits.executeAHB) {
|
||||
midBurst := !io.request.bits.finalBurst
|
||||
when (io.request.bits.firstBurst) {
|
||||
htrans := HTRANS_NONSEQ
|
||||
} .otherwise {
|
||||
htrans := HTRANS_SEQ
|
||||
}
|
||||
} .otherwise {
|
||||
when (midBurst) {
|
||||
htrans := HTRANS_BUSY
|
||||
} .otherwise {
|
||||
htrans := HTRANS_IDLE
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Address phase, clear repondTL when we have nothing to do
|
||||
when (addrReady) {
|
||||
when (io.request.fire()) {
|
||||
respondTL0 := io.request.bits.respondTL
|
||||
latchAtom0 := io.request.bits.latchAtom
|
||||
executeAHB0:= io.request.bits.executeAHB
|
||||
bubble := Bool(false)
|
||||
} .otherwise {
|
||||
respondTL0 := Bool(false)
|
||||
latchAtom0 := Bool(false)
|
||||
executeAHB0:= Bool(false)
|
||||
bubble := Bool(true) // an atom-injected Idle is not a bubble!
|
||||
}
|
||||
}
|
||||
|
||||
// Transfer bulk address phase
|
||||
when (io.request.fire()) {
|
||||
haddr := io.request.bits.haddr
|
||||
hmastlock := io.request.bits.hmastlock
|
||||
hwrite := io.request.bits.hwrite
|
||||
hburst := io.request.bits.hburst
|
||||
hsize := io.request.bits.hsize
|
||||
hprot := io.request.bits.hprot
|
||||
hwdata0 := io.request.bits.hwdata
|
||||
cmd := io.request.bits.cmd
|
||||
g_type0 := io.request.bits.g_type
|
||||
client_xact_id0 := io.request.bits.client_xact_id
|
||||
addr_beat0 := io.request.bits.addr_beat
|
||||
}
|
||||
|
||||
// Execute Atomic ops; unused and optimized away if !supportAtomics
|
||||
val amo_p = p.alterPartial({
|
||||
case CacheBlockOffsetBits => hastiAddrBits
|
||||
})
|
||||
val alu = Module(new AMOALU(hastiDataBits, rhsIsAligned = true)(amo_p))
|
||||
alu.io.addr := haddr
|
||||
alu.io.cmd := cmd
|
||||
alu.io.typ := hsize
|
||||
alu.io.rhs := hwdata0
|
||||
alu.io.lhs := hrdata
|
||||
|
||||
// Transfer bulk data phase
|
||||
when (dataReady) {
|
||||
when (addrReady) {
|
||||
respondTL1 := respondTL0
|
||||
latchAtom1 := latchAtom0
|
||||
executeAHB1 := executeAHB0
|
||||
} .otherwise {
|
||||
respondTL1 := Bool(false)
|
||||
latchAtom1 := Bool(false)
|
||||
executeAHB1 := Bool(false)
|
||||
}
|
||||
hwdata1 := Mux(Bool(supportAtomics), alu.io.out, hwdata0)
|
||||
g_type1 := g_type0
|
||||
client_xact_id1 := client_xact_id0
|
||||
addr_beat1 := addr_beat0
|
||||
}
|
||||
|
||||
// Latch the read result for an atomic operation
|
||||
when (dataReady && latchAtom1) {
|
||||
hrdata := io.ahb.hrdata
|
||||
}
|
||||
|
||||
// Only issue TL grant when the slave has provided data
|
||||
io.grant.valid := dataReady && respondTL1
|
||||
io.grant.bits := Grant(
|
||||
is_builtin_type = Bool(true),
|
||||
g_type = g_type1,
|
||||
client_xact_id = client_xact_id1,
|
||||
manager_xact_id = UInt(0),
|
||||
addr_beat = addr_beat1,
|
||||
data = io.ahb.hrdata)
|
||||
|
||||
// We cannot support errors from AHB to TileLink
|
||||
assert(!io.ahb.hresp, "AHB hresp error detected and cannot be reported via TileLink")
|
||||
}
|
||||
|
||||
class AHBBridge(supportAtomics: Boolean = true)(implicit val p: Parameters) extends Module
|
||||
with HasHastiParameters
|
||||
with HasTileLinkParameters
|
||||
with HasAddrMapParameters {
|
||||
val io = new Bundle {
|
||||
val tl = new ClientUncachedTileLinkIO().flip
|
||||
val ahb = new HastiMasterIO()
|
||||
}
|
||||
|
||||
// Hasti and TileLink widths must agree at this point in the topology
|
||||
require (tlDataBits == hastiDataBits)
|
||||
require (p(PAddrBits) == hastiAddrBits)
|
||||
|
||||
// AHB does not permit bursts to cross a 1KB boundary
|
||||
require (tlDataBits * tlDataBeats <= 1024*8)
|
||||
// tlDataBytes must be a power of 2
|
||||
require (1 << log2Ceil(tlDataBytes) == tlDataBytes)
|
||||
|
||||
// Create the sub-blocks
|
||||
val fsm = Module(new AHBTileLinkIn(supportAtomics))
|
||||
val bus = Module(new AHBBusMaster(supportAtomics))
|
||||
val pad = Module(new Queue(new Grant, 4))
|
||||
|
||||
fsm.io.acquire <> Queue(io.tl.acquire, 2) // Pipe is also acceptable
|
||||
bus.io.request <> fsm.io.request
|
||||
io.ahb <> bus.io.ahb
|
||||
io.tl.grant <> pad.io.deq
|
||||
|
||||
// The pad is needed to absorb AHB progress while !grant.ready
|
||||
// We are only 'ready' if the pad has at least 3 cycles of space
|
||||
bus.io.grant.ready := pad.io.count <= UInt(1)
|
||||
pad.io.enq.bits := bus.io.grant.bits
|
||||
pad.io.enq.valid := bus.io.grant.valid
|
||||
}
|
||||
383
src/main/scala/uncore/converters/Nasti.scala
Normal file
383
src/main/scala/uncore/converters/Nasti.scala
Normal file
@@ -0,0 +1,383 @@
|
||||
package uncore.converters
|
||||
|
||||
import Chisel._
|
||||
import junctions._
|
||||
import uncore.tilelink._
|
||||
import uncore.constants._
|
||||
import cde.Parameters
|
||||
import scala.math.min
|
||||
|
||||
class IdMapper(val inIdBits: Int, val outIdBits: Int,
|
||||
val forceMapping: Boolean = false)
|
||||
(implicit val p: Parameters) extends Module {
|
||||
|
||||
val io = new Bundle {
|
||||
val req = new Bundle {
|
||||
val valid = Bool(INPUT)
|
||||
val ready = Bool(OUTPUT)
|
||||
val in_id = UInt(INPUT, inIdBits)
|
||||
val out_id = UInt(OUTPUT, outIdBits)
|
||||
}
|
||||
val resp = new Bundle {
|
||||
val valid = Bool(INPUT)
|
||||
val matches = Bool(OUTPUT)
|
||||
val out_id = UInt(INPUT, outIdBits)
|
||||
val in_id = UInt(OUTPUT, inIdBits)
|
||||
}
|
||||
}
|
||||
val maxInXacts = 1 << inIdBits
|
||||
|
||||
if (inIdBits <= outIdBits && !forceMapping) {
|
||||
io.req.ready := Bool(true)
|
||||
io.req.out_id := io.req.in_id
|
||||
io.resp.matches := Bool(true)
|
||||
io.resp.in_id := io.resp.out_id
|
||||
} else {
|
||||
val nInXacts = 1 << inIdBits
|
||||
// No point in allowing more out xacts than in xacts
|
||||
val nOutXacts = min(1 << outIdBits, nInXacts)
|
||||
|
||||
val out_id_free = Reg(init = Vec.fill(nOutXacts){Bool(true)})
|
||||
val in_id_free = Reg(init = Vec.fill(nInXacts){Bool(true)})
|
||||
val next_out_id = PriorityEncoder(out_id_free)
|
||||
val id_mapping = Reg(Vec(nOutXacts, UInt(0, inIdBits)))
|
||||
|
||||
val req_fire = io.req.valid && io.req.ready
|
||||
when (req_fire) {
|
||||
out_id_free(io.req.out_id) := Bool(false)
|
||||
in_id_free(io.req.in_id) := Bool(false)
|
||||
id_mapping(io.req.out_id) := io.req.in_id
|
||||
}
|
||||
when (io.resp.valid) {
|
||||
out_id_free(io.resp.out_id) := Bool(true)
|
||||
in_id_free(io.resp.in_id) := Bool(true)
|
||||
}
|
||||
|
||||
io.req.ready := out_id_free.reduce(_ || _) && in_id_free(io.req.in_id)
|
||||
io.req.out_id := next_out_id
|
||||
|
||||
io.resp.in_id := id_mapping(io.resp.out_id)
|
||||
io.resp.matches := !out_id_free(io.resp.out_id)
|
||||
}
|
||||
}
|
||||
|
||||
class NastiIOTileLinkIOConverterInfo(implicit p: Parameters) extends TLBundle()(p) {
|
||||
val addr_beat = UInt(width = tlBeatAddrBits)
|
||||
val subblock = Bool()
|
||||
}
|
||||
|
||||
class NastiIOTileLinkIOConverter(implicit p: Parameters) extends TLModule()(p)
|
||||
with HasNastiParameters {
|
||||
val io = new Bundle {
|
||||
val tl = new ClientUncachedTileLinkIO().flip
|
||||
val nasti = new NastiIO
|
||||
}
|
||||
|
||||
val dataBits = tlDataBits*tlDataBeats
|
||||
require(tlDataBits == nastiXDataBits, "Data sizes between LLC and MC don't agree") // TODO: remove this restriction
|
||||
require(tlDataBeats < (1 << nastiXLenBits), "Can't have that many beats")
|
||||
|
||||
val has_data = io.tl.acquire.bits.hasData()
|
||||
|
||||
val is_subblock = io.tl.acquire.bits.isSubBlockType()
|
||||
val is_multibeat = io.tl.acquire.bits.hasMultibeatData()
|
||||
val (tl_cnt_out, tl_wrap_out) = Counter(
|
||||
io.tl.acquire.fire() && is_multibeat, tlDataBeats)
|
||||
|
||||
val get_valid = io.tl.acquire.valid && !has_data
|
||||
val put_valid = io.tl.acquire.valid && has_data
|
||||
|
||||
// Reorder queue saves extra information needed to send correct
|
||||
// grant back to TL client
|
||||
val roqIdBits = min(tlClientXactIdBits, nastiXIdBits)
|
||||
val roq = Module(new ReorderQueue(
|
||||
new NastiIOTileLinkIOConverterInfo, roqIdBits))
|
||||
|
||||
val get_id_mapper = Module(new IdMapper(tlClientXactIdBits, nastiXIdBits))
|
||||
val put_id_mapper = Module(new IdMapper(tlClientXactIdBits, nastiXIdBits))
|
||||
|
||||
val get_id_ready = get_id_mapper.io.req.ready
|
||||
val put_id_mask = is_subblock || io.tl.acquire.bits.addr_beat === UInt(0)
|
||||
val put_id_ready = put_id_mapper.io.req.ready || !put_id_mask
|
||||
|
||||
// For Get/GetBlock, make sure Reorder queue can accept new entry
|
||||
val get_helper = DecoupledHelper(
|
||||
get_valid,
|
||||
roq.io.enq.ready,
|
||||
io.nasti.ar.ready,
|
||||
get_id_ready)
|
||||
|
||||
val w_inflight = Reg(init = Bool(false))
|
||||
val w_id_reg = Reg(init = UInt(0, nastiXIdBits))
|
||||
val w_id = Mux(w_inflight, w_id_reg, put_id_mapper.io.req.out_id)
|
||||
|
||||
// For Put/PutBlock, make sure aw and w channel are both ready before
|
||||
// we send the first beat
|
||||
val aw_ready = w_inflight || io.nasti.aw.ready
|
||||
val put_helper = DecoupledHelper(
|
||||
put_valid,
|
||||
aw_ready,
|
||||
io.nasti.w.ready,
|
||||
put_id_ready)
|
||||
|
||||
val (nasti_cnt_out, nasti_wrap_out) = Counter(
|
||||
io.nasti.r.fire() && !roq.io.deq.data.subblock, tlDataBeats)
|
||||
|
||||
roq.io.enq.valid := get_helper.fire(roq.io.enq.ready)
|
||||
roq.io.enq.bits.tag := io.nasti.ar.bits.id
|
||||
roq.io.enq.bits.data.addr_beat := io.tl.acquire.bits.addr_beat
|
||||
roq.io.enq.bits.data.subblock := is_subblock
|
||||
roq.io.deq.valid := io.nasti.r.fire() && (nasti_wrap_out || roq.io.deq.data.subblock)
|
||||
roq.io.deq.tag := io.nasti.r.bits.id
|
||||
|
||||
get_id_mapper.io.req.valid := get_helper.fire(get_id_ready)
|
||||
get_id_mapper.io.req.in_id := io.tl.acquire.bits.client_xact_id
|
||||
get_id_mapper.io.resp.valid := io.nasti.r.fire() && io.nasti.r.bits.last
|
||||
get_id_mapper.io.resp.out_id := io.nasti.r.bits.id
|
||||
|
||||
put_id_mapper.io.req.valid := put_helper.fire(put_id_ready, put_id_mask)
|
||||
put_id_mapper.io.req.in_id := io.tl.acquire.bits.client_xact_id
|
||||
put_id_mapper.io.resp.valid := io.nasti.b.fire()
|
||||
put_id_mapper.io.resp.out_id := io.nasti.b.bits.id
|
||||
|
||||
// Decompose outgoing TL Acquires into Nasti address and data channels
|
||||
io.nasti.ar.valid := get_helper.fire(io.nasti.ar.ready)
|
||||
io.nasti.ar.bits := NastiReadAddressChannel(
|
||||
id = get_id_mapper.io.req.out_id,
|
||||
addr = io.tl.acquire.bits.full_addr(),
|
||||
size = Mux(is_subblock,
|
||||
io.tl.acquire.bits.op_size(),
|
||||
UInt(log2Ceil(tlDataBytes))),
|
||||
len = Mux(is_subblock, UInt(0), UInt(tlDataBeats - 1)))
|
||||
|
||||
def mask_helper(all_inside_0: Seq[Bool], defsize: Int): (Seq[Bool], UInt, UInt) = {
|
||||
val len = all_inside_0.size
|
||||
if (len == 1) {
|
||||
(Seq(Bool(true)), UInt(0), UInt(defsize))
|
||||
} else {
|
||||
val sub_inside_0 = Seq.tabulate (len/2) { i => all_inside_0(2*i) && all_inside_0(2*i+1) }
|
||||
val (sub_outside_0, sub_offset, sub_size) = mask_helper(sub_inside_0, defsize+1)
|
||||
val all_outside_0 = Seq.tabulate (len) { i => sub_outside_0(i/2) && all_inside_0(i^1) }
|
||||
val odd_outside_0 = Seq.tabulate (len/2) { i => all_outside_0(2*i+1) }
|
||||
val odd_outside = odd_outside_0.reduce (_ || _)
|
||||
val all_outside = all_outside_0.reduce (_ || _)
|
||||
val offset = Cat(sub_offset, odd_outside)
|
||||
val size = Mux(all_outside, UInt(defsize), sub_size)
|
||||
(all_outside_0, offset, size)
|
||||
}
|
||||
}
|
||||
|
||||
val all_inside_0 = (~io.tl.acquire.bits.wmask()).toBools
|
||||
val (_, put_offset, put_size) = mask_helper(all_inside_0, 0)
|
||||
|
||||
io.nasti.aw.valid := put_helper.fire(aw_ready, !w_inflight)
|
||||
io.nasti.aw.bits := NastiWriteAddressChannel(
|
||||
id = put_id_mapper.io.req.out_id,
|
||||
addr = io.tl.acquire.bits.full_addr() |
|
||||
Mux(is_multibeat, UInt(0), put_offset),
|
||||
size = Mux(is_multibeat, UInt(log2Ceil(tlDataBytes)), put_size),
|
||||
len = Mux(is_multibeat, UInt(tlDataBeats - 1), UInt(0)))
|
||||
|
||||
io.nasti.w.valid := put_helper.fire(io.nasti.w.ready)
|
||||
io.nasti.w.bits := NastiWriteDataChannel(
|
||||
id = w_id,
|
||||
data = io.tl.acquire.bits.data,
|
||||
strb = Some(io.tl.acquire.bits.wmask()),
|
||||
last = Mux(w_inflight,
|
||||
tl_cnt_out === UInt(tlDataBeats - 1), !is_multibeat))
|
||||
|
||||
io.tl.acquire.ready := Mux(has_data,
|
||||
put_helper.fire(put_valid),
|
||||
get_helper.fire(get_valid))
|
||||
|
||||
when (!w_inflight && io.tl.acquire.fire() && is_multibeat) {
|
||||
w_inflight := Bool(true)
|
||||
w_id_reg := w_id
|
||||
}
|
||||
|
||||
when (w_inflight) {
|
||||
when (tl_wrap_out) { w_inflight := Bool(false) }
|
||||
}
|
||||
|
||||
// Aggregate incoming NASTI responses into TL Grants
|
||||
val (tl_cnt_in, tl_wrap_in) = Counter(
|
||||
io.tl.grant.fire() && io.tl.grant.bits.hasMultibeatData(), tlDataBeats)
|
||||
val gnt_arb = Module(new LockingArbiter(new GrantToDst, 2,
|
||||
tlDataBeats, Some((gnt: GrantToDst) => gnt.hasMultibeatData())))
|
||||
io.tl.grant <> gnt_arb.io.out
|
||||
|
||||
gnt_arb.io.in(0).valid := io.nasti.r.valid
|
||||
io.nasti.r.ready := gnt_arb.io.in(0).ready
|
||||
gnt_arb.io.in(0).bits := Grant(
|
||||
is_builtin_type = Bool(true),
|
||||
g_type = Mux(roq.io.deq.data.subblock,
|
||||
Grant.getDataBeatType, Grant.getDataBlockType),
|
||||
client_xact_id = get_id_mapper.io.resp.in_id,
|
||||
manager_xact_id = UInt(0),
|
||||
addr_beat = Mux(roq.io.deq.data.subblock, roq.io.deq.data.addr_beat, tl_cnt_in),
|
||||
data = io.nasti.r.bits.data)
|
||||
|
||||
assert(!roq.io.deq.valid || roq.io.deq.matches,
|
||||
"TL -> NASTI converter ReorderQueue: NASTI tag error")
|
||||
assert(!gnt_arb.io.in(0).valid || get_id_mapper.io.resp.matches,
|
||||
"TL -> NASTI ID Mapper: NASTI tag error")
|
||||
|
||||
gnt_arb.io.in(1).valid := io.nasti.b.valid
|
||||
io.nasti.b.ready := gnt_arb.io.in(1).ready
|
||||
gnt_arb.io.in(1).bits := Grant(
|
||||
is_builtin_type = Bool(true),
|
||||
g_type = Grant.putAckType,
|
||||
client_xact_id = put_id_mapper.io.resp.in_id,
|
||||
manager_xact_id = UInt(0),
|
||||
addr_beat = UInt(0),
|
||||
data = Bits(0))
|
||||
assert(!gnt_arb.io.in(1).valid || put_id_mapper.io.resp.matches, "NASTI tag error")
|
||||
|
||||
assert(!io.nasti.r.valid || io.nasti.r.bits.resp === UInt(0), "NASTI read error")
|
||||
assert(!io.nasti.b.valid || io.nasti.b.bits.resp === UInt(0), "NASTI write error")
|
||||
}
|
||||
|
||||
class TileLinkIONastiIOConverter(implicit p: Parameters) extends TLModule()(p)
|
||||
with HasNastiParameters {
|
||||
val io = new Bundle {
|
||||
val nasti = (new NastiIO).flip
|
||||
val tl = new ClientUncachedTileLinkIO
|
||||
}
|
||||
|
||||
val (s_idle :: s_put :: Nil) = Enum(Bits(), 2)
|
||||
val state = Reg(init = s_idle)
|
||||
|
||||
private val blockOffset = tlByteAddrBits + tlBeatAddrBits
|
||||
|
||||
val aw_req = Reg(new NastiWriteAddressChannel)
|
||||
val w_tl_id = Reg(io.tl.acquire.bits.client_xact_id)
|
||||
|
||||
def is_singlebeat(chan: NastiAddressChannel): Bool =
|
||||
chan.len === UInt(0)
|
||||
|
||||
def is_multibeat(chan: NastiAddressChannel): Bool =
|
||||
chan.len === UInt(tlDataBeats - 1) && chan.size === UInt(log2Up(tlDataBytes))
|
||||
|
||||
def nasti_addr_block(chan: NastiAddressChannel): UInt =
|
||||
chan.addr(nastiXAddrBits - 1, blockOffset)
|
||||
|
||||
def nasti_addr_beat(chan: NastiAddressChannel): UInt =
|
||||
chan.addr(blockOffset - 1, tlByteAddrBits)
|
||||
|
||||
def nasti_addr_byte(chan: NastiAddressChannel): UInt =
|
||||
chan.addr(tlByteAddrBits - 1, 0)
|
||||
|
||||
def size_mask(size: UInt): UInt =
|
||||
(UInt(1) << (UInt(1) << size)) - UInt(1)
|
||||
|
||||
def nasti_wmask(aw: NastiWriteAddressChannel, w: NastiWriteDataChannel): UInt = {
|
||||
val base = w.strb & size_mask(aw.size)
|
||||
val addr_byte = nasti_addr_byte(aw)
|
||||
w.strb & (size_mask(aw.size) << addr_byte)
|
||||
}
|
||||
|
||||
def tl_last(gnt: GrantMetadata): Bool =
|
||||
!gnt.hasMultibeatData() || gnt.addr_beat === UInt(tlDataBeats - 1)
|
||||
|
||||
def tl_b_grant(gnt: GrantMetadata): Bool =
|
||||
gnt.g_type === Grant.putAckType
|
||||
|
||||
assert(!io.nasti.ar.valid ||
|
||||
is_singlebeat(io.nasti.ar.bits) || is_multibeat(io.nasti.ar.bits),
|
||||
"NASTI read transaction cannot convert to TileLInk")
|
||||
|
||||
assert(!io.nasti.aw.valid ||
|
||||
is_singlebeat(io.nasti.aw.bits) || is_multibeat(io.nasti.aw.bits),
|
||||
"NASTI write transaction cannot convert to TileLInk")
|
||||
|
||||
val put_count = Reg(init = UInt(0, tlBeatAddrBits))
|
||||
val get_id_mapper = Module(new IdMapper(nastiXIdBits, tlClientXactIdBits, true))
|
||||
val put_id_mapper = Module(new IdMapper(nastiXIdBits, tlClientXactIdBits, true))
|
||||
|
||||
when (io.nasti.aw.fire()) {
|
||||
aw_req := io.nasti.aw.bits
|
||||
w_tl_id := put_id_mapper.io.req.out_id
|
||||
state := s_put
|
||||
}
|
||||
|
||||
when (io.nasti.w.fire()) {
|
||||
put_count := put_count + UInt(1)
|
||||
when (io.nasti.w.bits.last) {
|
||||
put_count := UInt(0)
|
||||
state := s_idle
|
||||
}
|
||||
}
|
||||
|
||||
val get_acquire = Mux(is_multibeat(io.nasti.ar.bits),
|
||||
GetBlock(
|
||||
client_xact_id = get_id_mapper.io.req.out_id,
|
||||
addr_block = nasti_addr_block(io.nasti.ar.bits)),
|
||||
Get(
|
||||
client_xact_id = get_id_mapper.io.req.out_id,
|
||||
addr_block = nasti_addr_block(io.nasti.ar.bits),
|
||||
addr_beat = nasti_addr_beat(io.nasti.ar.bits),
|
||||
addr_byte = nasti_addr_byte(io.nasti.ar.bits),
|
||||
operand_size = io.nasti.ar.bits.size,
|
||||
alloc = Bool(false)))
|
||||
|
||||
val put_acquire = Mux(is_multibeat(aw_req),
|
||||
PutBlock(
|
||||
client_xact_id = w_tl_id,
|
||||
addr_block = nasti_addr_block(aw_req),
|
||||
addr_beat = put_count,
|
||||
data = io.nasti.w.bits.data,
|
||||
wmask = Some(io.nasti.w.bits.strb)),
|
||||
Put(
|
||||
client_xact_id = w_tl_id,
|
||||
addr_block = nasti_addr_block(aw_req),
|
||||
addr_beat = nasti_addr_beat(aw_req),
|
||||
data = io.nasti.w.bits.data,
|
||||
wmask = Some(nasti_wmask(aw_req, io.nasti.w.bits))))
|
||||
|
||||
val get_helper = DecoupledHelper(
|
||||
io.nasti.ar.valid,
|
||||
get_id_mapper.io.req.ready,
|
||||
io.tl.acquire.ready)
|
||||
|
||||
get_id_mapper.io.req.valid := get_helper.fire(
|
||||
get_id_mapper.io.req.ready, state === s_idle)
|
||||
get_id_mapper.io.req.in_id := io.nasti.ar.bits.id
|
||||
get_id_mapper.io.resp.out_id := io.tl.grant.bits.client_xact_id
|
||||
get_id_mapper.io.resp.valid := io.nasti.r.fire() && io.nasti.r.bits.last
|
||||
|
||||
val aw_ok = (state === s_idle && !io.nasti.ar.valid)
|
||||
|
||||
put_id_mapper.io.req.valid := aw_ok && io.nasti.aw.valid
|
||||
put_id_mapper.io.req.in_id := io.nasti.aw.bits.id
|
||||
put_id_mapper.io.resp.out_id := io.tl.grant.bits.client_xact_id
|
||||
put_id_mapper.io.resp.valid := io.nasti.b.fire()
|
||||
|
||||
io.tl.acquire.bits := Mux(state === s_put, put_acquire, get_acquire)
|
||||
io.tl.acquire.valid := get_helper.fire(io.tl.acquire.ready, state === s_idle) ||
|
||||
(state === s_put && io.nasti.w.valid)
|
||||
|
||||
io.nasti.ar.ready := get_helper.fire(io.nasti.ar.valid, state === s_idle)
|
||||
io.nasti.aw.ready := aw_ok && put_id_mapper.io.req.ready
|
||||
io.nasti.w.ready := (state === s_put && io.tl.acquire.ready)
|
||||
|
||||
val nXacts = tlMaxClientXacts * tlMaxClientsPerPort
|
||||
|
||||
io.nasti.b.valid := io.tl.grant.valid && tl_b_grant(io.tl.grant.bits)
|
||||
io.nasti.b.bits := NastiWriteResponseChannel(
|
||||
id = put_id_mapper.io.resp.in_id)
|
||||
|
||||
assert(!io.nasti.b.valid || put_id_mapper.io.resp.matches,
|
||||
"Put ID does not match")
|
||||
|
||||
io.nasti.r.valid := io.tl.grant.valid && !tl_b_grant(io.tl.grant.bits)
|
||||
io.nasti.r.bits := NastiReadDataChannel(
|
||||
id = get_id_mapper.io.resp.in_id,
|
||||
data = io.tl.grant.bits.data,
|
||||
last = tl_last(io.tl.grant.bits))
|
||||
|
||||
assert(!io.nasti.r.valid || get_id_mapper.io.resp.matches,
|
||||
"Get ID does not match")
|
||||
|
||||
io.tl.grant.ready := Mux(tl_b_grant(io.tl.grant.bits),
|
||||
io.nasti.b.ready, io.nasti.r.ready)
|
||||
}
|
||||
32
src/main/scala/uncore/converters/Smi.scala
Normal file
32
src/main/scala/uncore/converters/Smi.scala
Normal file
@@ -0,0 +1,32 @@
|
||||
// See LICENSE for details
|
||||
|
||||
package uncore.converters
|
||||
|
||||
import Chisel._
|
||||
import junctions._
|
||||
import uncore.tilelink._
|
||||
import cde.Parameters
|
||||
|
||||
/** Convert TileLink protocol to Smi protocol */
|
||||
class SmiIOTileLinkIOConverter(val dataWidth: Int, val addrWidth: Int)
|
||||
(implicit p: Parameters) extends Module {
|
||||
val io = new Bundle {
|
||||
val tl = (new ClientUncachedTileLinkIO).flip
|
||||
val smi = new SmiIO(dataWidth, addrWidth)
|
||||
}
|
||||
|
||||
def decoupledNastiConnect(outer: NastiIO, inner: NastiIO) {
|
||||
outer.ar <> Queue(inner.ar)
|
||||
outer.aw <> Queue(inner.aw)
|
||||
outer.w <> Queue(inner.w)
|
||||
inner.r <> Queue(outer.r)
|
||||
inner.b <> Queue(outer.b)
|
||||
}
|
||||
|
||||
val tl2nasti = Module(new NastiIOTileLinkIOConverter())
|
||||
val nasti2smi = Module(new SmiIONastiIOConverter(dataWidth, addrWidth))
|
||||
|
||||
tl2nasti.io.tl <> io.tl
|
||||
decoupledNastiConnect(nasti2smi.io.nasti, tl2nasti.io.nasti)
|
||||
io.smi <> nasti2smi.io.smi
|
||||
}
|
||||
681
src/main/scala/uncore/converters/Tilelink.scala
Normal file
681
src/main/scala/uncore/converters/Tilelink.scala
Normal file
@@ -0,0 +1,681 @@
|
||||
package uncore.converters
|
||||
|
||||
import Chisel._
|
||||
import junctions._
|
||||
import uncore.tilelink._
|
||||
import uncore.util._
|
||||
import uncore.constants._
|
||||
import cde.Parameters
|
||||
|
||||
/** Utilities for safely wrapping a *UncachedTileLink by pinning probe.ready and release.valid low */
|
||||
object TileLinkIOWrapper {
|
||||
def apply(tl: ClientUncachedTileLinkIO)(implicit p: Parameters): ClientTileLinkIO = {
|
||||
val conv = Module(new ClientTileLinkIOWrapper)
|
||||
conv.io.in <> tl
|
||||
conv.io.out
|
||||
}
|
||||
def apply(tl: UncachedTileLinkIO)(implicit p: Parameters): TileLinkIO = {
|
||||
val conv = Module(new TileLinkIOWrapper)
|
||||
conv.io.in <> tl
|
||||
conv.io.out
|
||||
}
|
||||
def apply(tl: ClientTileLinkIO): ClientTileLinkIO = tl
|
||||
def apply(tl: TileLinkIO): TileLinkIO = tl
|
||||
}
|
||||
|
||||
class TileLinkIOWrapper(implicit p: Parameters) extends TLModule()(p) {
|
||||
val io = new Bundle {
|
||||
val in = new UncachedTileLinkIO().flip
|
||||
val out = new TileLinkIO
|
||||
}
|
||||
io.out.acquire <> io.in.acquire
|
||||
io.in.grant <> io.out.grant
|
||||
io.out.finish <> io.in.finish
|
||||
io.out.probe.ready := Bool(true)
|
||||
io.out.release.valid := Bool(false)
|
||||
}
|
||||
|
||||
class ClientTileLinkIOWrapper(implicit p: Parameters) extends TLModule()(p) {
|
||||
val io = new Bundle {
|
||||
val in = new ClientUncachedTileLinkIO().flip
|
||||
val out = new ClientTileLinkIO
|
||||
}
|
||||
io.out.acquire <> io.in.acquire
|
||||
io.in.grant <> io.out.grant
|
||||
io.out.probe.ready := Bool(true)
|
||||
io.out.release.valid := Bool(false)
|
||||
}
|
||||
|
||||
class ClientTileLinkIOUnwrapper(implicit p: Parameters) extends TLModule()(p) {
|
||||
val io = new Bundle {
|
||||
val in = new ClientTileLinkIO().flip
|
||||
val out = new ClientUncachedTileLinkIO
|
||||
}
|
||||
|
||||
val acqArb = Module(new LockingRRArbiter(new Acquire, 2, tlDataBeats,
|
||||
Some((acq: Acquire) => acq.hasMultibeatData())))
|
||||
|
||||
val acqRoq = Module(new ReorderQueue(Bool(), tlClientXactIdBits))
|
||||
val relRoq = Module(new ReorderQueue(Bool(), tlClientXactIdBits))
|
||||
|
||||
val iacq = io.in.acquire.bits
|
||||
val irel = io.in.release.bits
|
||||
val ognt = io.out.grant.bits
|
||||
|
||||
val acq_roq_enq = iacq.first()
|
||||
val rel_roq_enq = irel.first()
|
||||
|
||||
val acq_roq_ready = !acq_roq_enq || acqRoq.io.enq.ready
|
||||
val rel_roq_ready = !rel_roq_enq || relRoq.io.enq.ready
|
||||
|
||||
val acq_helper = DecoupledHelper(
|
||||
io.in.acquire.valid,
|
||||
acq_roq_ready,
|
||||
acqArb.io.in(0).ready)
|
||||
|
||||
val rel_helper = DecoupledHelper(
|
||||
io.in.release.valid,
|
||||
rel_roq_ready,
|
||||
acqArb.io.in(1).ready)
|
||||
|
||||
acqRoq.io.enq.valid := acq_helper.fire(acq_roq_ready, acq_roq_enq)
|
||||
acqRoq.io.enq.bits.data := iacq.isBuiltInType()
|
||||
acqRoq.io.enq.bits.tag := iacq.client_xact_id
|
||||
|
||||
acqArb.io.in(0).valid := acq_helper.fire(acqArb.io.in(0).ready)
|
||||
acqArb.io.in(0).bits := Acquire(
|
||||
is_builtin_type = Bool(true),
|
||||
a_type = Mux(iacq.isBuiltInType(),
|
||||
iacq.a_type, Acquire.getBlockType),
|
||||
client_xact_id = iacq.client_xact_id,
|
||||
addr_block = iacq.addr_block,
|
||||
addr_beat = iacq.addr_beat,
|
||||
data = iacq.data,
|
||||
union = iacq.union)
|
||||
io.in.acquire.ready := acq_helper.fire(io.in.acquire.valid)
|
||||
|
||||
relRoq.io.enq.valid := rel_helper.fire(rel_roq_ready, rel_roq_enq)
|
||||
relRoq.io.enq.bits.data := irel.isVoluntary()
|
||||
relRoq.io.enq.bits.tag := irel.client_xact_id
|
||||
|
||||
acqArb.io.in(1).valid := rel_helper.fire(acqArb.io.in(1).ready)
|
||||
acqArb.io.in(1).bits := PutBlock(
|
||||
client_xact_id = irel.client_xact_id,
|
||||
addr_block = irel.addr_block,
|
||||
addr_beat = irel.addr_beat,
|
||||
data = irel.data)
|
||||
io.in.release.ready := rel_helper.fire(io.in.release.valid)
|
||||
|
||||
io.out.acquire <> acqArb.io.out
|
||||
|
||||
val grant_deq_roq = io.out.grant.fire() && ognt.last()
|
||||
|
||||
acqRoq.io.deq.valid := acqRoq.io.deq.matches && grant_deq_roq
|
||||
acqRoq.io.deq.tag := ognt.client_xact_id
|
||||
|
||||
relRoq.io.deq.valid := !acqRoq.io.deq.matches && grant_deq_roq
|
||||
relRoq.io.deq.tag := ognt.client_xact_id
|
||||
|
||||
assert(!grant_deq_roq || acqRoq.io.deq.matches || relRoq.io.deq.matches,
|
||||
"TileLink Unwrapper: client_xact_id mismatch")
|
||||
|
||||
val gnt_builtin = acqRoq.io.deq.data
|
||||
val gnt_voluntary = relRoq.io.deq.data
|
||||
|
||||
val acq_grant = Grant(
|
||||
is_builtin_type = gnt_builtin,
|
||||
g_type = Mux(gnt_builtin, ognt.g_type, tlCoh.getExclusiveGrantType),
|
||||
client_xact_id = ognt.client_xact_id,
|
||||
manager_xact_id = ognt.manager_xact_id,
|
||||
addr_beat = ognt.addr_beat,
|
||||
data = ognt.data)
|
||||
|
||||
assert(!io.in.release.valid || io.in.release.bits.isVoluntary(), "Unwrapper can only process voluntary releases.")
|
||||
val rel_grant = Grant(
|
||||
is_builtin_type = Bool(true),
|
||||
g_type = Grant.voluntaryAckType, // We should only every be working with voluntary releases
|
||||
client_xact_id = ognt.client_xact_id,
|
||||
manager_xact_id = ognt.manager_xact_id,
|
||||
addr_beat = ognt.addr_beat,
|
||||
data = ognt.data)
|
||||
|
||||
io.in.grant.valid := io.out.grant.valid
|
||||
io.in.grant.bits := Mux(acqRoq.io.deq.matches, acq_grant, rel_grant)
|
||||
io.out.grant.ready := io.in.grant.ready
|
||||
|
||||
io.in.probe.valid := Bool(false)
|
||||
}
|
||||
|
||||
object TileLinkWidthAdapter {
|
||||
def apply(in: ClientUncachedTileLinkIO, outerId: String)(implicit p: Parameters) = {
|
||||
val outerDataBits = p(TLKey(outerId)).dataBitsPerBeat
|
||||
if (outerDataBits > in.tlDataBits) {
|
||||
val widener = Module(new TileLinkIOWidener(in.p(TLId), outerId))
|
||||
widener.io.in <> in
|
||||
widener.io.out
|
||||
} else if (outerDataBits < in.tlDataBits) {
|
||||
val narrower = Module(new TileLinkIONarrower(in.p(TLId), outerId))
|
||||
narrower.io.in <> in
|
||||
narrower.io.out
|
||||
} else { in }
|
||||
}
|
||||
def apply(out: ClientUncachedTileLinkIO, in: ClientUncachedTileLinkIO)(implicit p: Parameters): Unit = {
|
||||
require(out.tlDataBits * out.tlDataBeats == in.tlDataBits * in.tlDataBeats)
|
||||
out <> apply(in, out.p(TLId))
|
||||
}
|
||||
}
|
||||
|
||||
class TileLinkIOWidener(innerTLId: String, outerTLId: String)
|
||||
(implicit p: Parameters) extends TLModule()(p) {
|
||||
|
||||
val paddrBits = p(PAddrBits)
|
||||
val innerParams = p(TLKey(innerTLId))
|
||||
val outerParams = p(TLKey(outerTLId))
|
||||
val innerDataBeats = innerParams.dataBeats
|
||||
val innerDataBits = innerParams.dataBitsPerBeat
|
||||
val innerWriteMaskBits = innerParams.writeMaskBits
|
||||
val innerByteAddrBits = log2Up(innerWriteMaskBits)
|
||||
val innerMaxXacts = innerParams.maxClientXacts * innerParams.maxClientsPerPort
|
||||
val innerXactIdBits = log2Up(innerMaxXacts)
|
||||
val outerDataBeats = outerParams.dataBeats
|
||||
val outerDataBits = outerParams.dataBitsPerBeat
|
||||
val outerWriteMaskBits = outerParams.writeMaskBits
|
||||
val outerByteAddrBits = log2Up(outerWriteMaskBits)
|
||||
val outerBeatAddrBits = log2Up(outerDataBeats)
|
||||
val outerBlockOffset = outerBeatAddrBits + outerByteAddrBits
|
||||
val outerMaxClients = outerParams.maxClientsPerPort
|
||||
val outerClientIdBits = log2Up(outerParams.maxClientXacts * outerMaxClients)
|
||||
val outerManagerIdBits = log2Up(outerParams.maxManagerXacts)
|
||||
val outerBlockAddrBits = paddrBits - outerBlockOffset
|
||||
|
||||
require(outerDataBeats <= innerDataBeats)
|
||||
require(outerDataBits >= innerDataBits)
|
||||
require(outerDataBits % innerDataBits == 0)
|
||||
require(outerDataBits * outerDataBeats == innerDataBits * innerDataBeats)
|
||||
|
||||
val factor = innerDataBeats / outerDataBeats
|
||||
|
||||
val io = new Bundle {
|
||||
val in = new ClientUncachedTileLinkIO()(p.alterPartial({case TLId => innerTLId})).flip
|
||||
val out = new ClientUncachedTileLinkIO()(p.alterPartial({case TLId => outerTLId}))
|
||||
}
|
||||
|
||||
val iacq = io.in.acquire.bits
|
||||
val oacq = io.out.acquire.bits
|
||||
val ognt = io.out.grant.bits
|
||||
val ignt = io.in.grant.bits
|
||||
|
||||
val shrink = iacq.a_type === Acquire.putBlockType
|
||||
val stretch = ognt.g_type === Grant.getDataBlockType
|
||||
val smallget = iacq.a_type === Acquire.getType
|
||||
val smallput = iacq.a_type === Acquire.putType
|
||||
val smallgnt = ognt.g_type === Grant.getDataBeatType
|
||||
|
||||
val sending_put = Reg(init = Bool(false))
|
||||
val collecting = Reg(init = Bool(false))
|
||||
val put_block = Reg(UInt(width = outerBlockAddrBits))
|
||||
val put_id = Reg(UInt(width = outerClientIdBits))
|
||||
val put_data = Reg(Vec(factor, UInt(width = innerDataBits)))
|
||||
val put_wmask = Reg(Vec(factor, UInt(width = innerWriteMaskBits)))
|
||||
val put_allocate = Reg(Bool())
|
||||
val (put_beat, put_done) = Counter(io.out.acquire.fire() && oacq.hasMultibeatData(), outerDataBeats)
|
||||
val (recv_idx, recv_done) = Counter(io.in.acquire.fire() && iacq.hasMultibeatData(), factor)
|
||||
|
||||
val in_addr = iacq.full_addr()
|
||||
val out_addr_block = in_addr(paddrBits - 1, outerBlockOffset)
|
||||
val out_addr_beat = in_addr(outerBlockOffset - 1, outerByteAddrBits)
|
||||
val out_addr_byte = in_addr(outerByteAddrBits - 1, 0)
|
||||
|
||||
val switch_addr = in_addr(outerByteAddrBits - 1, innerByteAddrBits)
|
||||
val smallget_switch = Reg(Vec(innerMaxXacts, switch_addr))
|
||||
|
||||
def align_data(addr: UInt, data: UInt): UInt =
|
||||
data << Cat(addr, UInt(0, log2Up(innerDataBits)))
|
||||
|
||||
def align_wmask(addr: UInt, wmask: UInt): UInt =
|
||||
wmask << Cat(addr, UInt(0, log2Up(innerWriteMaskBits)))
|
||||
|
||||
val outerConfig = p.alterPartial({ case TLId => outerTLId })
|
||||
|
||||
val get_acquire = Get(
|
||||
client_xact_id = iacq.client_xact_id,
|
||||
addr_block = out_addr_block,
|
||||
addr_beat = out_addr_beat,
|
||||
addr_byte = out_addr_byte,
|
||||
operand_size = iacq.op_size(),
|
||||
alloc = iacq.allocate())(outerConfig)
|
||||
|
||||
val get_block_acquire = GetBlock(
|
||||
client_xact_id = iacq.client_xact_id,
|
||||
addr_block = out_addr_block,
|
||||
alloc = iacq.allocate())(outerConfig)
|
||||
|
||||
val put_acquire = Put(
|
||||
client_xact_id = iacq.client_xact_id,
|
||||
addr_block = out_addr_block,
|
||||
addr_beat = out_addr_beat,
|
||||
data = align_data(switch_addr, iacq.data),
|
||||
wmask = Some(align_wmask(switch_addr, iacq.wmask())),
|
||||
alloc = iacq.allocate())(outerConfig)
|
||||
|
||||
val put_block_acquire = PutBlock(
|
||||
client_xact_id = put_id,
|
||||
addr_block = put_block,
|
||||
addr_beat = put_beat,
|
||||
data = put_data.asUInt,
|
||||
wmask = Some(put_wmask.asUInt))(outerConfig)
|
||||
|
||||
io.out.acquire.valid := sending_put || (!shrink && io.in.acquire.valid)
|
||||
io.out.acquire.bits := MuxCase(get_block_acquire, Seq(
|
||||
sending_put -> put_block_acquire,
|
||||
smallget -> get_acquire,
|
||||
smallput -> put_acquire))
|
||||
io.in.acquire.ready := !sending_put && (shrink || io.out.acquire.ready)
|
||||
|
||||
when (io.in.acquire.fire() && shrink) {
|
||||
when (!collecting) {
|
||||
put_block := out_addr_block
|
||||
put_id := iacq.client_xact_id
|
||||
put_allocate := iacq.allocate()
|
||||
collecting := Bool(true)
|
||||
}
|
||||
put_data(recv_idx) := iacq.data
|
||||
put_wmask(recv_idx) := iacq.wmask()
|
||||
}
|
||||
|
||||
when (io.in.acquire.fire() && smallget) {
|
||||
smallget_switch(iacq.client_xact_id) := switch_addr
|
||||
}
|
||||
|
||||
when (recv_done) { sending_put := Bool(true) }
|
||||
when (sending_put && io.out.acquire.ready) { sending_put := Bool(false) }
|
||||
when (put_done) { collecting := Bool(false) }
|
||||
|
||||
val returning_data = Reg(init = Bool(false))
|
||||
val (send_idx, send_done) = Counter(
|
||||
io.in.grant.ready && returning_data, factor)
|
||||
|
||||
val gnt_beat = Reg(UInt(width = outerBeatAddrBits))
|
||||
val gnt_client_id = Reg(UInt(width = outerClientIdBits))
|
||||
val gnt_manager_id = Reg(UInt(width = outerManagerIdBits))
|
||||
val gnt_data = Reg(UInt(width = outerDataBits))
|
||||
|
||||
when (io.out.grant.fire() && stretch) {
|
||||
gnt_data := ognt.data
|
||||
gnt_client_id := ognt.client_xact_id
|
||||
gnt_manager_id := ognt.manager_xact_id
|
||||
gnt_beat := ognt.addr_beat
|
||||
returning_data := Bool(true)
|
||||
}
|
||||
|
||||
when (send_done) { returning_data := Bool(false) }
|
||||
|
||||
def select_data(data: UInt, sel: UInt): UInt =
|
||||
data >> (sel << log2Up(innerDataBits))
|
||||
|
||||
val gnt_switch = smallget_switch(ognt.client_xact_id)
|
||||
|
||||
val innerConfig = p.alterPartial({ case TLId => innerTLId })
|
||||
|
||||
val get_block_grant = Grant(
|
||||
is_builtin_type = Bool(true),
|
||||
g_type = Grant.getDataBlockType,
|
||||
client_xact_id = gnt_client_id,
|
||||
manager_xact_id = gnt_manager_id,
|
||||
addr_beat = Cat(gnt_beat, send_idx),
|
||||
data = select_data(gnt_data, send_idx))(innerConfig)
|
||||
|
||||
val get_grant = Grant(
|
||||
is_builtin_type = Bool(true),
|
||||
g_type = Grant.getDataBeatType,
|
||||
client_xact_id = ognt.client_xact_id,
|
||||
manager_xact_id = ognt.manager_xact_id,
|
||||
addr_beat = Cat(ognt.addr_beat, gnt_switch),
|
||||
data = select_data(ognt.data, gnt_switch))(innerConfig)
|
||||
|
||||
val default_grant = Grant(
|
||||
is_builtin_type = Bool(true),
|
||||
g_type = ognt.g_type,
|
||||
client_xact_id = ognt.client_xact_id,
|
||||
manager_xact_id = ognt.manager_xact_id,
|
||||
addr_beat = ognt.addr_beat,
|
||||
data = ognt.data)(innerConfig)
|
||||
|
||||
io.in.grant.valid := returning_data || (!stretch && io.out.grant.valid)
|
||||
io.in.grant.bits := MuxCase(default_grant, Seq(
|
||||
returning_data -> get_block_grant,
|
||||
smallgnt -> get_grant))
|
||||
io.out.grant.ready := !returning_data && (stretch || io.in.grant.ready)
|
||||
}
|
||||
|
||||
class TileLinkIONarrower(innerTLId: String, outerTLId: String)
|
||||
(implicit p: Parameters) extends TLModule()(p) {
|
||||
|
||||
val innerParams = p(TLKey(innerTLId))
|
||||
val outerParams = p(TLKey(outerTLId))
|
||||
val innerDataBeats = innerParams.dataBeats
|
||||
val innerDataBits = innerParams.dataBitsPerBeat
|
||||
val innerWriteMaskBits = innerParams.writeMaskBits
|
||||
val innerByteAddrBits = log2Up(innerWriteMaskBits)
|
||||
val outerDataBeats = outerParams.dataBeats
|
||||
val outerDataBits = outerParams.dataBitsPerBeat
|
||||
val outerWriteMaskBits = outerParams.writeMaskBits
|
||||
val outerByteAddrBits = log2Up(outerWriteMaskBits)
|
||||
val outerBeatAddrBits = log2Up(outerDataBeats)
|
||||
val outerBlockOffset = outerBeatAddrBits + outerByteAddrBits
|
||||
val outerMaxClients = outerParams.maxClientsPerPort
|
||||
val outerIdBits = log2Up(outerParams.maxClientXacts * outerMaxClients)
|
||||
|
||||
require(outerDataBeats > innerDataBeats)
|
||||
require(outerDataBeats % innerDataBeats == 0)
|
||||
require(outerDataBits < innerDataBits)
|
||||
require(outerDataBits * outerDataBeats == innerDataBits * innerDataBeats)
|
||||
|
||||
val factor = outerDataBeats / innerDataBeats
|
||||
|
||||
val io = new Bundle {
|
||||
val in = new ClientUncachedTileLinkIO()(p.alterPartial({case TLId => innerTLId})).flip
|
||||
val out = new ClientUncachedTileLinkIO()(p.alterPartial({case TLId => outerTLId}))
|
||||
}
|
||||
|
||||
val iacq = io.in.acquire.bits
|
||||
val ognt = io.out.grant.bits
|
||||
|
||||
val stretch = iacq.a_type === Acquire.putBlockType
|
||||
val shrink = iacq.a_type === Acquire.getBlockType
|
||||
val smallput = iacq.a_type === Acquire.putType
|
||||
val smallget = iacq.a_type === Acquire.getType
|
||||
|
||||
val acq_data_buffer = Reg(UInt(width = innerDataBits))
|
||||
val acq_wmask_buffer = Reg(UInt(width = innerWriteMaskBits))
|
||||
val acq_client_id = Reg(iacq.client_xact_id)
|
||||
val acq_addr_block = Reg(iacq.addr_block)
|
||||
val acq_addr_beat = Reg(iacq.addr_beat)
|
||||
val oacq_ctr = Counter(factor)
|
||||
|
||||
val outer_beat_addr = iacq.full_addr()(outerBlockOffset - 1, outerByteAddrBits)
|
||||
val outer_byte_addr = iacq.full_addr()(outerByteAddrBits - 1, 0)
|
||||
|
||||
val mask_chunks = Vec.tabulate(factor) { i =>
|
||||
val lsb = i * outerWriteMaskBits
|
||||
val msb = (i + 1) * outerWriteMaskBits - 1
|
||||
iacq.wmask()(msb, lsb)
|
||||
}
|
||||
|
||||
val data_chunks = Vec.tabulate(factor) { i =>
|
||||
val lsb = i * outerDataBits
|
||||
val msb = (i + 1) * outerDataBits - 1
|
||||
iacq.data(msb, lsb)
|
||||
}
|
||||
|
||||
val beat_sel = Cat(mask_chunks.map(mask => mask.orR).reverse)
|
||||
|
||||
val smallput_data = Mux1H(beat_sel, data_chunks)
|
||||
val smallput_wmask = Mux1H(beat_sel, mask_chunks)
|
||||
val smallput_beat = Cat(iacq.addr_beat, PriorityEncoder(beat_sel))
|
||||
|
||||
assert(!io.in.acquire.valid || !smallput || PopCount(beat_sel) <= UInt(1),
|
||||
"Can't perform Put wider than outer width")
|
||||
|
||||
val read_size_ok = iacq.op_size() <= UInt(log2Ceil(outerDataBits / 8))
|
||||
assert(!io.in.acquire.valid || !smallget || read_size_ok,
|
||||
"Can't perform Get wider than outer width")
|
||||
|
||||
val outerConfig = p.alterPartial({ case TLId => outerTLId })
|
||||
val innerConfig = p.alterPartial({ case TLId => innerTLId })
|
||||
|
||||
val get_block_acquire = GetBlock(
|
||||
client_xact_id = iacq.client_xact_id,
|
||||
addr_block = iacq.addr_block,
|
||||
alloc = iacq.allocate())(outerConfig)
|
||||
|
||||
val put_block_acquire = PutBlock(
|
||||
client_xact_id = acq_client_id,
|
||||
addr_block = acq_addr_block,
|
||||
addr_beat = if (factor > 1)
|
||||
Cat(acq_addr_beat, oacq_ctr.value)
|
||||
else acq_addr_beat,
|
||||
data = acq_data_buffer(outerDataBits - 1, 0),
|
||||
wmask = Some(acq_wmask_buffer(outerWriteMaskBits - 1, 0)))(outerConfig)
|
||||
|
||||
val get_acquire = Get(
|
||||
client_xact_id = iacq.client_xact_id,
|
||||
addr_block = iacq.addr_block,
|
||||
addr_beat = outer_beat_addr,
|
||||
addr_byte = outer_byte_addr,
|
||||
operand_size = iacq.op_size(),
|
||||
alloc = iacq.allocate())(outerConfig)
|
||||
|
||||
val put_acquire = Put(
|
||||
client_xact_id = iacq.client_xact_id,
|
||||
addr_block = iacq.addr_block,
|
||||
addr_beat = smallput_beat,
|
||||
data = smallput_data,
|
||||
wmask = Some(smallput_wmask))(outerConfig)
|
||||
|
||||
val sending_put = Reg(init = Bool(false))
|
||||
|
||||
val pass_valid = io.in.acquire.valid && !stretch
|
||||
|
||||
io.out.acquire.bits := MuxCase(Wire(io.out.acquire.bits, init=iacq), Seq(
|
||||
(sending_put, put_block_acquire),
|
||||
(shrink, get_block_acquire),
|
||||
(smallput, put_acquire),
|
||||
(smallget, get_acquire)))
|
||||
io.out.acquire.valid := sending_put || pass_valid
|
||||
io.in.acquire.ready := !sending_put && (stretch || io.out.acquire.ready)
|
||||
|
||||
when (io.in.acquire.fire() && stretch) {
|
||||
acq_data_buffer := iacq.data
|
||||
acq_wmask_buffer := iacq.wmask()
|
||||
acq_client_id := iacq.client_xact_id
|
||||
acq_addr_block := iacq.addr_block
|
||||
acq_addr_beat := iacq.addr_beat
|
||||
sending_put := Bool(true)
|
||||
}
|
||||
|
||||
when (sending_put && io.out.acquire.ready) {
|
||||
acq_data_buffer := acq_data_buffer >> outerDataBits
|
||||
acq_wmask_buffer := acq_wmask_buffer >> outerWriteMaskBits
|
||||
when (oacq_ctr.inc()) { sending_put := Bool(false) }
|
||||
}
|
||||
|
||||
val ognt_block = ognt.hasMultibeatData()
|
||||
val gnt_data_buffer = Reg(Vec(factor, UInt(width = outerDataBits)))
|
||||
val gnt_client_id = Reg(ognt.client_xact_id)
|
||||
val gnt_manager_id = Reg(ognt.manager_xact_id)
|
||||
|
||||
val ignt_ctr = Counter(innerDataBeats)
|
||||
val ognt_ctr = Counter(factor)
|
||||
val sending_get = Reg(init = Bool(false))
|
||||
|
||||
val get_block_grant = Grant(
|
||||
is_builtin_type = Bool(true),
|
||||
g_type = Grant.getDataBlockType,
|
||||
client_xact_id = gnt_client_id,
|
||||
manager_xact_id = gnt_manager_id,
|
||||
addr_beat = ignt_ctr.value,
|
||||
data = gnt_data_buffer.asUInt)(innerConfig)
|
||||
|
||||
val smallget_grant = ognt.g_type === Grant.getDataBeatType
|
||||
|
||||
val get_grant = Grant(
|
||||
is_builtin_type = Bool(true),
|
||||
g_type = Grant.getDataBeatType,
|
||||
client_xact_id = ognt.client_xact_id,
|
||||
manager_xact_id = ognt.manager_xact_id,
|
||||
addr_beat = ognt.addr_beat >> UInt(log2Up(factor)),
|
||||
data = Fill(factor, ognt.data))(innerConfig)
|
||||
|
||||
io.in.grant.valid := sending_get || (io.out.grant.valid && !ognt_block)
|
||||
io.out.grant.ready := !sending_get && (ognt_block || io.in.grant.ready)
|
||||
|
||||
io.in.grant.bits := MuxCase(Wire(io.in.grant.bits, init=ognt), Seq(
|
||||
sending_get -> get_block_grant,
|
||||
smallget_grant -> get_grant))
|
||||
|
||||
when (io.out.grant.valid && ognt_block && !sending_get) {
|
||||
gnt_data_buffer(ognt_ctr.value) := ognt.data
|
||||
when (ognt_ctr.inc()) {
|
||||
gnt_client_id := ognt.client_xact_id
|
||||
gnt_manager_id := ognt.manager_xact_id
|
||||
sending_get := Bool(true)
|
||||
}
|
||||
}
|
||||
|
||||
when (io.in.grant.ready && sending_get) {
|
||||
ignt_ctr.inc()
|
||||
sending_get := Bool(false)
|
||||
}
|
||||
}
|
||||
|
||||
class TileLinkFragmenterSource(implicit p: Parameters) extends TLModule()(p) {
|
||||
val io = new Bundle {
|
||||
val in = Decoupled(new Acquire).flip
|
||||
val out = Decoupled(new Acquire)
|
||||
val que = Decoupled(UInt(width = tlBeatAddrBits))
|
||||
}
|
||||
|
||||
// Pipeline stage with acquire data; needed to ensure in.bits stay fixed when !in.ready
|
||||
val acq_valid = RegInit(Bool(false))
|
||||
val acq_bits = Reg(new Acquire)
|
||||
// The last beat of generate acquire to send
|
||||
val acq_last_beat = Reg(UInt(width = tlBeatAddrBits))
|
||||
val acq_last = acq_bits.addr_beat === acq_last_beat
|
||||
|
||||
// 'in' has the first beat?
|
||||
val in_multi_put = io.in.bits.isBuiltInType(Acquire.putBlockType)
|
||||
val in_multi_get = io.in.bits.isBuiltInType(Acquire.getBlockType)
|
||||
val in_first_beat = !in_multi_put || io.in.bits.addr_beat === UInt(0)
|
||||
|
||||
// Move stuff from acq to out whenever out is ready
|
||||
io.out.valid := acq_valid
|
||||
// When can acq accept a request?
|
||||
val acq_ready = !acq_valid || (acq_last && io.out.ready)
|
||||
// Move the first beat from in to acq only when both acq and que are ready
|
||||
io.in.ready := (!in_first_beat || io.que.ready) && acq_ready
|
||||
io.que.valid := (in_first_beat && io.in.valid) && acq_ready
|
||||
|
||||
// in.fire moves data from in to acq and (optionally) que
|
||||
// out.fire moves data from acq to out
|
||||
|
||||
// Desired flow control results:
|
||||
assert (!io.que.fire() || io.in.fire()) // 1. que.fire => in.fire
|
||||
assert (!(io.in.fire() && in_first_beat) || io.que.fire()) // 2. in.fire && in_first_beat => que.fire
|
||||
assert (!io.out.fire() || acq_valid) // 3. out.fire => acq_valid
|
||||
assert (!io.in.fire() || (!acq_valid || (io.out.fire() && acq_last))) // 4. in.fire => !acq_valid || (out.fire && acq_last)
|
||||
// Proofs:
|
||||
// 1. que.fire => que.ready && in.valid && acq_ready => in.ready && in.valid
|
||||
// 2. in.fire && in_first_beat => in.valid && acq_ready && [(!in_first_beat || que.ready) && in_first_beat] =>
|
||||
// in.valid && acq_ready && que.ready && in_first_beat => que.valid && que.ready
|
||||
// 3. out.fire => out.valid => acq_valid
|
||||
// 4. in.fire => acq_ready => !acq_valid || (acq_last && out.ready) =>
|
||||
// !acq_valid || (acq_valid && acq_last && out.ready) => !acq_valid || (acq_last && out.fire)
|
||||
|
||||
val multi_size = SInt(-1, width = tlBeatAddrBits).asUInt // TL2: use in.bits.size()/beatBits-1
|
||||
val in_sizeMinus1 = Mux(in_multi_get || in_multi_put, multi_size, UInt(0))
|
||||
val in_insertSizeMinus1 = Mux(in_multi_get, multi_size, UInt(0))
|
||||
|
||||
when (io.in.fire()) {
|
||||
// Theorem 4 makes this safe; we overwrite garbage, or replace the final acq
|
||||
acq_valid := Bool(true)
|
||||
acq_bits := io.in.bits
|
||||
acq_last_beat := io.in.bits.addr_beat + in_insertSizeMinus1
|
||||
// Replace this with size truncation in TL2:
|
||||
acq_bits.a_type := Mux(in_multi_put, Acquire.putType, Mux(in_multi_get, Acquire.getType, io.in.bits.a_type))
|
||||
} .elsewhen (io.out.fire()) {
|
||||
acq_valid := !acq_last // false => !in.valid || (!que.ready && in_first_beat)
|
||||
acq_bits.addr_beat := acq_bits.addr_beat + UInt(1)
|
||||
// acq_last && out.fire => acq_last && out.ready && acq_valid => acq_ready
|
||||
// Suppose in.valid, then !in.fire => !in.ready => !(!in_first_beat || que.ready) => !que.ready && in_first_beat
|
||||
}
|
||||
|
||||
// Safe by theorem 3
|
||||
io.out.bits := acq_bits
|
||||
// Safe by theorem 1
|
||||
io.que.bits := in_sizeMinus1
|
||||
}
|
||||
|
||||
class TileLinkFragmenterSink(implicit p: Parameters) extends TLModule()(p) {
|
||||
val io = new Bundle {
|
||||
val in = Decoupled(new Grant).flip
|
||||
val out = Decoupled(new Grant)
|
||||
val que = Decoupled(UInt(width = tlBeatAddrBits)).flip
|
||||
}
|
||||
|
||||
val count_valid = RegInit(Bool(false))
|
||||
val multi_op = Reg(Bool())
|
||||
val count_bits = Reg(UInt(width = tlBeatAddrBits))
|
||||
val last = count_bits === UInt(0)
|
||||
|
||||
val in_put = io.in.bits.isBuiltInType(Grant.putAckType)
|
||||
val in_get = io.in.bits.isBuiltInType(Grant.getDataBeatType)
|
||||
val deliver = last || in_get
|
||||
|
||||
// Accept the input, discarding the non-final put grant
|
||||
io.in.ready := count_valid && (io.out.ready || !deliver)
|
||||
// Output the grant whenever we want delivery
|
||||
io.out.valid := count_valid && io.in.valid && deliver
|
||||
// Take a new number whenever we deliver the last beat
|
||||
io.que.ready := !count_valid || (io.in.valid && io.out.ready && last)
|
||||
|
||||
// Desired flow control results:
|
||||
assert (!io.out.fire() || (count_valid && io.in.fire())) // 1. out.fire => in.fire && count_valid
|
||||
assert (!(io.in.fire() && deliver) || io.out.fire()) // 2. in.fire && deliver => out.fire
|
||||
assert (!(io.out.fire() && last) || io.que.ready) // 3. out.fire && last => que.ready
|
||||
assert (!io.que.fire() || (!count_valid || io.out.fire())) // 4. que.fire => !count_valid || (out.fire && last)
|
||||
// Proofs:
|
||||
// 1. out.fire => out.ready && (count_valid && in.valid && deliver) => (count_valid && out.ready) && in.valid => in.fire
|
||||
// 2. in.fire && deliver => in.valid && count_valid && [(out.ready || !deliver) && deliver] =>
|
||||
// in.valid && count_valid && deliver && out.ready => out.fire
|
||||
// 3. out.fire && last => out.valid && out.ready && last => in.valid && out.ready && last => que.ready
|
||||
// 4. que.fire => que.valid && (!count_valid || (in.valid && out.ready && last))
|
||||
// => !count_valid || (count_valid && in.valid && out.ready && [last => deliver])
|
||||
// => !count_valid || (out.valid && out.ready && last)
|
||||
|
||||
when (io.que.fire()) {
|
||||
// Theorem 4 makes this safe; we overwrite garbage or last output
|
||||
count_valid := Bool(true)
|
||||
count_bits := io.que.bits
|
||||
multi_op := io.que.bits =/= UInt(0)
|
||||
} .elsewhen (io.in.fire()) {
|
||||
count_valid := !last // false => !que.valid
|
||||
count_bits := count_bits - UInt(1)
|
||||
// Proof: in.fire && [last => deliver] =2=> out.fire && last =3=> que.ready
|
||||
// !que.fire && que.ready => !que.valid
|
||||
}
|
||||
|
||||
// Safe by Theorem 1
|
||||
io.out.bits := io.in.bits
|
||||
io.out.bits.g_type := Mux(multi_op, Mux(in_get, Grant.getDataBlockType, Grant.putAckType), io.in.bits.g_type)
|
||||
}
|
||||
|
||||
class TileLinkFragmenter(depth: Int = 1)(implicit p: Parameters) extends TLModule()(p) {
|
||||
val io = new Bundle {
|
||||
val in = new ClientUncachedTileLinkIO().flip
|
||||
val out = new ClientUncachedTileLinkIO
|
||||
}
|
||||
|
||||
// TL2:
|
||||
// supportsAcquire = false
|
||||
// modify all outward managers to supportsMultibeat = true
|
||||
// assert: all managers must behaveFIFO (not inspect duplicated id field)
|
||||
|
||||
val source = Module(new TileLinkFragmenterSource)
|
||||
val sink = Module(new TileLinkFragmenterSink)
|
||||
sink.io.que <> Queue(source.io.que, depth)
|
||||
|
||||
source.io.in <> io.in.acquire
|
||||
io.out.acquire <> source.io.out
|
||||
sink.io.in <> io.out.grant
|
||||
io.in.grant <> sink.io.out
|
||||
}
|
||||
|
||||
object TileLinkFragmenter {
|
||||
// Pass the source/client to fragment
|
||||
def apply(source: ClientUncachedTileLinkIO, depth: Int = 1)(implicit p: Parameters): ClientUncachedTileLinkIO = {
|
||||
val fragmenter = Module(new TileLinkFragmenter(depth))
|
||||
fragmenter.io.in <> source
|
||||
fragmenter.io.out
|
||||
}
|
||||
}
|
||||
161
src/main/scala/uncore/devices/Bram.scala
Normal file
161
src/main/scala/uncore/devices/Bram.scala
Normal file
@@ -0,0 +1,161 @@
|
||||
package uncore.devices
|
||||
|
||||
import Chisel._
|
||||
import cde.{Parameters, Field}
|
||||
import junctions._
|
||||
import uncore.tilelink._
|
||||
import uncore.util._
|
||||
import HastiConstants._
|
||||
|
||||
class BRAMSlave(depth: Int)(implicit val p: Parameters) extends Module
|
||||
with HasTileLinkParameters {
|
||||
val io = new ClientUncachedTileLinkIO().flip
|
||||
|
||||
// For TL2:
|
||||
// supportsAcquire = false
|
||||
// supportsMultibeat = false
|
||||
// supportsHint = false
|
||||
// supportsAtomic = false
|
||||
|
||||
// Timing-wise, we assume the input is coming out of registers
|
||||
// since you probably needed a TileLinkFragmenter infront of us
|
||||
|
||||
// Thus, only one pipeline stage: the grant result
|
||||
val g_valid = RegInit(Bool(false))
|
||||
val g_bits = Reg(new Grant)
|
||||
|
||||
// Just pass the pipeline straight through
|
||||
io.grant.valid := g_valid
|
||||
io.grant.bits := g_bits
|
||||
io.acquire.ready := !g_valid || io.grant.ready
|
||||
|
||||
val acq_get = io.acquire.bits.isBuiltInType(Acquire.getType)
|
||||
val acq_put = io.acquire.bits.isBuiltInType(Acquire.putType)
|
||||
val acq_addr = Cat(io.acquire.bits.addr_block, io.acquire.bits.addr_beat)
|
||||
|
||||
val bram = Mem(depth, Bits(width = tlDataBits))
|
||||
|
||||
val ren = acq_get && io.acquire.fire()
|
||||
val wen = acq_put && io.acquire.fire()
|
||||
|
||||
when (io.grant.fire()) {
|
||||
g_valid := Bool(false)
|
||||
}
|
||||
|
||||
when (io.acquire.fire()) {
|
||||
g_valid := Bool(true)
|
||||
g_bits := Grant(
|
||||
is_builtin_type = Bool(true),
|
||||
g_type = io.acquire.bits.getBuiltInGrantType(),
|
||||
client_xact_id = io.acquire.bits.client_xact_id,
|
||||
manager_xact_id = UInt(0),
|
||||
addr_beat = io.acquire.bits.addr_beat,
|
||||
data = UInt(0))
|
||||
}
|
||||
|
||||
when (wen) {
|
||||
bram.write(acq_addr, io.acquire.bits.data)
|
||||
assert(io.acquire.bits.wmask().andR, "BRAMSlave: partial write masks not supported")
|
||||
}
|
||||
io.grant.bits.data := RegEnable(bram.read(acq_addr), ren)
|
||||
}
|
||||
|
||||
class HastiRAM(depth: Int)(implicit p: Parameters) extends HastiModule()(p) {
|
||||
val io = new HastiSlaveIO
|
||||
|
||||
val wdata = Vec.tabulate(hastiDataBytes)(i => io.hwdata(8*(i+1)-1,8*i))
|
||||
val waddr = Reg(UInt(width = hastiAddrBits))
|
||||
val wvalid = Reg(init = Bool(false))
|
||||
val wsize = Reg(UInt(width = SZ_HSIZE))
|
||||
val ram = SeqMem(depth, Vec(hastiDataBytes, Bits(width = 8)))
|
||||
|
||||
val max_size = log2Ceil(hastiDataBytes)
|
||||
val wmask_lut = MuxLookup(wsize, SInt(-1, hastiDataBytes).asUInt,
|
||||
(0 until max_size).map(sz => (UInt(sz) -> UInt((1 << (1 << sz)) - 1))))
|
||||
val wmask = (wmask_lut << waddr(max_size - 1, 0))(hastiDataBytes - 1, 0)
|
||||
|
||||
val is_trans = io.hsel && io.htrans.isOneOf(HTRANS_NONSEQ, HTRANS_SEQ)
|
||||
val raddr = io.haddr >> UInt(max_size)
|
||||
val ren = is_trans && !io.hwrite
|
||||
val bypass = Reg(init = Bool(false))
|
||||
|
||||
when (is_trans && io.hwrite) {
|
||||
waddr := io.haddr
|
||||
wsize := io.hsize
|
||||
wvalid := Bool(true)
|
||||
} .otherwise { wvalid := Bool(false) }
|
||||
|
||||
when (ren) { bypass := wvalid && (waddr >> UInt(max_size)) === raddr }
|
||||
|
||||
when (wvalid) {
|
||||
ram.write(waddr >> UInt(max_size), wdata, wmask.toBools)
|
||||
}
|
||||
|
||||
val rdata = ram.read(raddr, ren)
|
||||
io.hrdata := Cat(rdata.zip(wmask.toBools).zip(wdata).map {
|
||||
case ((rbyte, wsel), wbyte) => Mux(wsel && bypass, wbyte, rbyte)
|
||||
}.reverse)
|
||||
|
||||
io.hready := Bool(true)
|
||||
io.hresp := HRESP_OKAY
|
||||
}
|
||||
|
||||
/**
|
||||
* This RAM is not meant to be particularly performant.
|
||||
* It just supports the entire range of uncached TileLink operations in the
|
||||
* simplest way possible.
|
||||
*/
|
||||
class TileLinkTestRAM(depth: Int)(implicit val p: Parameters) extends Module
|
||||
with HasTileLinkParameters {
|
||||
val io = new ClientUncachedTileLinkIO().flip
|
||||
|
||||
val ram = Mem(depth, UInt(width = tlDataBits))
|
||||
|
||||
val responding = Reg(init = Bool(false))
|
||||
val acq = io.acquire.bits
|
||||
val r_acq = Reg(io.acquire.bits)
|
||||
val acq_addr = Cat(acq.addr_block, acq.addr_beat)
|
||||
val r_acq_addr = Cat(r_acq.addr_block, r_acq.addr_beat)
|
||||
|
||||
when (io.acquire.fire() && io.acquire.bits.last()) {
|
||||
r_acq := io.acquire.bits
|
||||
responding := Bool(true)
|
||||
}
|
||||
|
||||
when (io.grant.fire()) {
|
||||
val is_getblk = r_acq.isBuiltInType(Acquire.getBlockType)
|
||||
val last_beat = r_acq.addr_beat === UInt(tlDataBeats - 1)
|
||||
when (is_getblk && !last_beat) {
|
||||
r_acq.addr_beat := r_acq.addr_beat + UInt(1)
|
||||
} .otherwise { responding := Bool(false) }
|
||||
}
|
||||
|
||||
val old_data = ram(acq_addr)
|
||||
val new_data = acq.data
|
||||
val r_old_data = RegEnable(old_data, io.acquire.fire())
|
||||
|
||||
io.acquire.ready := !responding
|
||||
io.grant.valid := responding
|
||||
io.grant.bits := Grant(
|
||||
is_builtin_type = Bool(true),
|
||||
g_type = r_acq.getBuiltInGrantType(),
|
||||
client_xact_id = r_acq.client_xact_id,
|
||||
manager_xact_id = UInt(0),
|
||||
addr_beat = r_acq.addr_beat,
|
||||
data = Mux(r_acq.isAtomic(), r_old_data, ram(r_acq_addr)))
|
||||
|
||||
val amo_shift_bits = acq.amo_shift_bytes() << UInt(3)
|
||||
val amoalu = Module(new AMOALU(amoAluOperandBits, rhsIsAligned = true))
|
||||
amoalu.io.addr := Cat(acq.addr_block, acq.addr_beat, acq.addr_byte())
|
||||
amoalu.io.cmd := acq.op_code()
|
||||
amoalu.io.typ := acq.op_size()
|
||||
amoalu.io.lhs := old_data >> amo_shift_bits
|
||||
amoalu.io.rhs := new_data >> amo_shift_bits
|
||||
|
||||
val result = Mux(acq.isAtomic(), amoalu.io.out << amo_shift_bits, new_data)
|
||||
val wmask = FillInterleaved(8, acq.wmask())
|
||||
|
||||
when (io.acquire.fire() && acq.hasData()) {
|
||||
ram(acq_addr) := (old_data & ~wmask) | (result & wmask)
|
||||
}
|
||||
}
|
||||
1001
src/main/scala/uncore/devices/Debug.scala
Normal file
1001
src/main/scala/uncore/devices/Debug.scala
Normal file
File diff suppressed because it is too large
Load Diff
187
src/main/scala/uncore/devices/Plic.scala
Normal file
187
src/main/scala/uncore/devices/Plic.scala
Normal file
@@ -0,0 +1,187 @@
|
||||
// See LICENSE for license details.
|
||||
|
||||
package uncore.devices
|
||||
|
||||
import Chisel._
|
||||
import Chisel.ImplicitConversions._
|
||||
|
||||
import junctions._
|
||||
import uncore.tilelink._
|
||||
import cde.Parameters
|
||||
|
||||
class GatewayPLICIO extends Bundle {
|
||||
val valid = Bool(OUTPUT)
|
||||
val ready = Bool(INPUT)
|
||||
val complete = Bool(INPUT)
|
||||
}
|
||||
|
||||
class LevelGateway extends Module {
|
||||
val io = new Bundle {
|
||||
val interrupt = Bool(INPUT)
|
||||
val plic = new GatewayPLICIO
|
||||
}
|
||||
|
||||
val inFlight = Reg(init=Bool(false))
|
||||
when (io.interrupt && io.plic.ready) { inFlight := true }
|
||||
when (io.plic.complete) { inFlight := false }
|
||||
io.plic.valid := io.interrupt && !inFlight
|
||||
}
|
||||
|
||||
case class PLICConfig(nHartsIn: Int, supervisor: Boolean, nDevices: Int, nPriorities: Int) {
|
||||
def contextsPerHart = if (supervisor) 2 else 1
|
||||
def nHarts = contextsPerHart * nHartsIn
|
||||
def context(i: Int, mode: Char) = mode match {
|
||||
case 'M' => i * contextsPerHart
|
||||
case 'S' => require(supervisor); i * contextsPerHart + 1
|
||||
}
|
||||
def claimAddr(i: Int, mode: Char) = hartBase + hartOffset(context(i, mode)) + claimOffset
|
||||
def threshAddr(i: Int, mode: Char) = hartBase + hartOffset(context(i, mode))
|
||||
def enableAddr(i: Int, mode: Char) = enableBase + enableOffset(context(i, mode))
|
||||
def size = hartBase + hartOffset(maxHarts)
|
||||
|
||||
def maxDevices = 1023
|
||||
def maxHarts = 15872
|
||||
def pendingBase = 0x1000
|
||||
def enableBase = 0x2000
|
||||
def hartBase = 0x200000
|
||||
require(hartBase >= enableBase + enableOffset(maxHarts))
|
||||
|
||||
def enableOffset(i: Int) = i * ((maxDevices+7)/8)
|
||||
def hartOffset(i: Int) = i * 0x1000
|
||||
def claimOffset = 4
|
||||
def priorityBytes = 4
|
||||
|
||||
require(nDevices > 0 && nDevices <= maxDevices)
|
||||
require(nHarts > 0 && nHarts <= maxHarts)
|
||||
require(nPriorities >= 0 && nPriorities <= nDevices)
|
||||
}
|
||||
|
||||
/** Platform-Level Interrupt Controller */
|
||||
class PLIC(val cfg: PLICConfig)(implicit val p: Parameters) extends Module
|
||||
with HasTileLinkParameters
|
||||
with HasAddrMapParameters {
|
||||
val io = new Bundle {
|
||||
val devices = Vec(cfg.nDevices, new GatewayPLICIO).flip
|
||||
val harts = Vec(cfg.nHarts, Bool()).asOutput
|
||||
val tl = new ClientUncachedTileLinkIO().flip
|
||||
}
|
||||
|
||||
val priority =
|
||||
if (cfg.nPriorities > 0) Reg(Vec(cfg.nDevices+1, UInt(width=log2Up(cfg.nPriorities+1))))
|
||||
else Wire(init=Vec.fill(cfg.nDevices+1)(UInt(1)))
|
||||
val threshold =
|
||||
if (cfg.nPriorities > 0) Reg(Vec(cfg.nHarts, UInt(width = log2Up(cfg.nPriorities+1))))
|
||||
else Wire(init=Vec.fill(cfg.nHarts)(UInt(0)))
|
||||
val pending = Reg(init=Vec.fill(cfg.nDevices+1){Bool(false)})
|
||||
val enables = Reg(Vec(cfg.nHarts, Vec(cfg.nDevices+1, Bool())))
|
||||
|
||||
for ((p, g) <- pending.tail zip io.devices) {
|
||||
g.ready := !p
|
||||
g.complete := false
|
||||
when (g.valid) { p := true }
|
||||
}
|
||||
|
||||
def findMax(x: Seq[UInt]): (UInt, UInt) = {
|
||||
if (x.length > 1) {
|
||||
val half = 1 << (log2Ceil(x.length) - 1)
|
||||
val lMax = findMax(x take half)
|
||||
val rMax = findMax(x drop half)
|
||||
val useLeft = lMax._1 >= rMax._1
|
||||
(Mux(useLeft, lMax._1, rMax._1), Mux(useLeft, lMax._2, UInt(half) + rMax._2))
|
||||
} else (x.head, UInt(0))
|
||||
}
|
||||
|
||||
val maxDevs = Wire(Vec(cfg.nHarts, UInt(width = log2Up(pending.size))))
|
||||
for (hart <- 0 until cfg.nHarts) {
|
||||
val effectivePriority =
|
||||
for (((p, en), pri) <- (pending zip enables(hart) zip priority).tail)
|
||||
yield Cat(p && en, pri)
|
||||
val (maxPri, maxDev) = findMax((UInt(1) << priority(0).getWidth) +: effectivePriority)
|
||||
|
||||
maxDevs(hart) := Reg(next = maxDev)
|
||||
io.harts(hart) := Reg(next = maxPri) > Cat(UInt(1), threshold(hart))
|
||||
}
|
||||
|
||||
val acq = Queue(io.tl.acquire, 1)
|
||||
val read = acq.fire() && acq.bits.isBuiltInType(Acquire.getType)
|
||||
val write = acq.fire() && acq.bits.isBuiltInType(Acquire.putType)
|
||||
assert(!acq.fire() || read || write, "unsupported PLIC operation")
|
||||
val addr = acq.bits.full_addr()(log2Up(cfg.size)-1,0)
|
||||
|
||||
val claimant =
|
||||
if (cfg.nHarts == 1) UInt(0)
|
||||
else (addr - cfg.hartBase)(log2Up(cfg.hartOffset(cfg.nHarts))-1,log2Up(cfg.hartOffset(1)))
|
||||
val hart = Wire(init = claimant)
|
||||
val myMaxDev = maxDevs(claimant)
|
||||
val myEnables = enables(hart)
|
||||
val rdata = Wire(init = UInt(0, tlDataBits))
|
||||
val masked_wdata = (acq.bits.data & acq.bits.full_wmask()) | (rdata & ~acq.bits.full_wmask())
|
||||
|
||||
when (addr >= cfg.hartBase) {
|
||||
val word =
|
||||
if (tlDataBytes > cfg.claimOffset) UInt(0)
|
||||
else addr(log2Up(cfg.claimOffset),log2Up(tlDataBytes))
|
||||
rdata := Cat(myMaxDev, UInt(0, 8*cfg.priorityBytes-threshold(0).getWidth), threshold(claimant)) >> (word * tlDataBits)
|
||||
|
||||
when (read && addr(log2Ceil(cfg.claimOffset))) {
|
||||
pending(myMaxDev) := false
|
||||
}
|
||||
when (write) {
|
||||
when (if (tlDataBytes > cfg.claimOffset) acq.bits.wmask()(cfg.claimOffset) else addr(log2Ceil(cfg.claimOffset))) {
|
||||
val dev = (acq.bits.data >> ((8 * cfg.claimOffset) % tlDataBits))(log2Up(pending.size)-1,0)
|
||||
when (myEnables(dev)) { io.devices(dev-1).complete := true }
|
||||
}.otherwise {
|
||||
if (cfg.nPriorities > 0) threshold(claimant) := acq.bits.data
|
||||
}
|
||||
}
|
||||
}.elsewhen (addr >= cfg.enableBase) {
|
||||
val enableHart =
|
||||
if (cfg.nHarts > 1) (addr - cfg.enableBase)(log2Up(cfg.enableOffset(cfg.nHarts))-1,log2Up(cfg.enableOffset(1)))
|
||||
else UInt(0)
|
||||
hart := enableHart
|
||||
val word =
|
||||
if (tlDataBits >= cfg.nHarts) UInt(0)
|
||||
else addr(log2Up((cfg.nHarts+7)/8)-1,log2Up(tlDataBytes))
|
||||
for (i <- 0 until cfg.nHarts by tlDataBits) {
|
||||
when (word === i/tlDataBits) {
|
||||
rdata := Cat(myEnables.slice(i, i + tlDataBits).reverse)
|
||||
for (j <- 0 until (tlDataBits min (myEnables.size - i))) {
|
||||
when (write) { enables(enableHart)(i+j) := masked_wdata(j) }
|
||||
}
|
||||
}
|
||||
}
|
||||
}.elsewhen (addr >= cfg.pendingBase) {
|
||||
val word =
|
||||
if (tlDataBytes >= pending.size) UInt(0)
|
||||
else addr(log2Up(pending.size)-1,log2Up(tlDataBytes))
|
||||
rdata := pending.asUInt >> (word * tlDataBits)
|
||||
}.otherwise {
|
||||
val regsPerBeat = tlDataBytes >> log2Up(cfg.priorityBytes)
|
||||
val word =
|
||||
if (regsPerBeat >= priority.size) UInt(0)
|
||||
else addr(log2Up(priority.size*cfg.priorityBytes)-1,log2Up(tlDataBytes))
|
||||
for (i <- 0 until priority.size by regsPerBeat) {
|
||||
when (word === i/regsPerBeat) {
|
||||
rdata := Cat(priority.slice(i, i + regsPerBeat).map(p => Cat(UInt(0, 8*cfg.priorityBytes-p.getWidth), p)).reverse)
|
||||
for (j <- 0 until (regsPerBeat min (priority.size - i))) {
|
||||
if (cfg.nPriorities > 0) when (write) { priority(i+j) := masked_wdata >> (j * 8 * cfg.priorityBytes) }
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
priority(0) := 0
|
||||
pending(0) := false
|
||||
for (e <- enables)
|
||||
e(0) := false
|
||||
|
||||
io.tl.grant.valid := acq.valid
|
||||
acq.ready := io.tl.grant.ready
|
||||
io.tl.grant.bits := Grant(
|
||||
is_builtin_type = Bool(true),
|
||||
g_type = acq.bits.getBuiltInGrantType(),
|
||||
client_xact_id = acq.bits.client_xact_id,
|
||||
manager_xact_id = UInt(0),
|
||||
addr_beat = UInt(0),
|
||||
data = rdata)
|
||||
}
|
||||
127
src/main/scala/uncore/devices/Prci.scala
Normal file
127
src/main/scala/uncore/devices/Prci.scala
Normal file
@@ -0,0 +1,127 @@
|
||||
// See LICENSE for license details.
|
||||
|
||||
package uncore.devices
|
||||
|
||||
import Chisel._
|
||||
import Chisel.ImplicitConversions._
|
||||
import junctions._
|
||||
import junctions.NastiConstants._
|
||||
import uncore.tilelink._
|
||||
import cde.{Parameters, Field}
|
||||
|
||||
/** Number of tiles */
|
||||
case object NTiles extends Field[Int]
|
||||
|
||||
class PRCIInterrupts extends Bundle {
|
||||
val meip = Bool()
|
||||
val seip = Bool()
|
||||
val debug = Bool()
|
||||
}
|
||||
|
||||
class PRCITileIO(implicit p: Parameters) extends Bundle {
|
||||
val reset = Bool(OUTPUT)
|
||||
val id = UInt(OUTPUT, log2Up(p(NTiles)))
|
||||
val interrupts = new PRCIInterrupts {
|
||||
val mtip = Bool()
|
||||
val msip = Bool()
|
||||
}.asOutput
|
||||
|
||||
override def cloneType: this.type = new PRCITileIO().asInstanceOf[this.type]
|
||||
}
|
||||
|
||||
object PRCI {
|
||||
def msip(hart: Int) = hart * msipBytes
|
||||
def timecmp(hart: Int) = 0x4000 + hart * timecmpBytes
|
||||
def time = 0xbff8
|
||||
def msipBytes = 4
|
||||
def timecmpBytes = 8
|
||||
def size = 0xc000
|
||||
}
|
||||
|
||||
/** Power, Reset, Clock, Interrupt */
|
||||
class PRCI(implicit val p: Parameters) extends Module
|
||||
with HasTileLinkParameters
|
||||
with HasAddrMapParameters {
|
||||
val io = new Bundle {
|
||||
val interrupts = Vec(p(NTiles), new PRCIInterrupts).asInput
|
||||
val tl = new ClientUncachedTileLinkIO().flip
|
||||
val tiles = Vec(p(NTiles), new PRCITileIO)
|
||||
val rtcTick = Bool(INPUT)
|
||||
}
|
||||
|
||||
val timeWidth = 64
|
||||
val timecmp = Reg(Vec(p(NTiles), UInt(width = timeWidth)))
|
||||
val time = Reg(init=UInt(0, timeWidth))
|
||||
when (io.rtcTick) { time := time + UInt(1) }
|
||||
|
||||
val ipi = Reg(init=Vec.fill(p(NTiles))(UInt(0, 32)))
|
||||
|
||||
val acq = Queue(io.tl.acquire, 1)
|
||||
val addr = acq.bits.full_addr()(log2Ceil(PRCI.size)-1,0)
|
||||
val read = acq.bits.isBuiltInType(Acquire.getType)
|
||||
val rdata = Wire(init=UInt(0))
|
||||
io.tl.grant.valid := acq.valid
|
||||
acq.ready := io.tl.grant.ready
|
||||
io.tl.grant.bits := Grant(
|
||||
is_builtin_type = Bool(true),
|
||||
g_type = acq.bits.getBuiltInGrantType(),
|
||||
client_xact_id = acq.bits.client_xact_id,
|
||||
manager_xact_id = UInt(0),
|
||||
addr_beat = UInt(0),
|
||||
data = rdata)
|
||||
|
||||
when (addr(log2Floor(PRCI.time))) {
|
||||
require(log2Floor(PRCI.timecmp(p(NTiles)-1)) < log2Floor(PRCI.time))
|
||||
rdata := load(Vec(time + UInt(0)), acq.bits)
|
||||
}.elsewhen (addr >= PRCI.timecmp(0)) {
|
||||
rdata := store(timecmp, acq.bits)
|
||||
}.otherwise {
|
||||
rdata := store(ipi, acq.bits) & Fill(tlDataBits/32, UInt(1, 32))
|
||||
}
|
||||
|
||||
for ((tile, i) <- io.tiles zipWithIndex) {
|
||||
tile.interrupts := io.interrupts(i)
|
||||
tile.interrupts.msip := ipi(i)(0)
|
||||
tile.interrupts.mtip := time >= timecmp(i)
|
||||
tile.id := UInt(i)
|
||||
}
|
||||
|
||||
// TODO generalize these to help other TL slaves
|
||||
def load(v: Vec[UInt], acq: Acquire): UInt = {
|
||||
val w = v.head.getWidth
|
||||
val a = acq.full_addr()
|
||||
require(isPow2(w) && w >= 8)
|
||||
if (w > tlDataBits) {
|
||||
(v(a(log2Up(w/8*v.size)-1,log2Up(w/8))) >> a(log2Up(w/8)-1,log2Up(tlDataBytes)))(tlDataBits-1,0)
|
||||
} else {
|
||||
val row = for (i <- 0 until v.size by tlDataBits/w)
|
||||
yield Cat(v.slice(i, i + tlDataBits/w).reverse)
|
||||
if (row.size == 1) row.head
|
||||
else Vec(row)(a(log2Up(w/8*v.size)-1,log2Up(tlDataBytes)))
|
||||
}
|
||||
}
|
||||
|
||||
def store(v: Vec[UInt], acq: Acquire): UInt = {
|
||||
val w = v.head.getWidth
|
||||
require(isPow2(w) && w >= 8)
|
||||
val a = acq.full_addr()
|
||||
val rdata = load(v, acq)
|
||||
val wdata = (acq.data & acq.full_wmask()) | (rdata & ~acq.full_wmask())
|
||||
if (w <= tlDataBits) {
|
||||
val word =
|
||||
if (tlDataBits/w >= v.size) UInt(0)
|
||||
else a(log2Up(w/8*v.size)-1,log2Up(tlDataBytes))
|
||||
for (i <- 0 until v.size) {
|
||||
when (acq.isBuiltInType(Acquire.putType) && word === i/(tlDataBits/w)) {
|
||||
val base = i % (tlDataBits/w)
|
||||
v(i) := wdata >> (w * (i % (tlDataBits/w)))
|
||||
}
|
||||
}
|
||||
} else {
|
||||
val i = a(log2Up(w/8*v.size)-1,log2Up(w/8))
|
||||
val mask = FillInterleaved(tlDataBits, UIntToOH(a(log2Up(w/8)-1,log2Up(tlDataBytes))))
|
||||
v(i) := (wdata & mask) | (v(i) & ~mask)
|
||||
}
|
||||
rdata
|
||||
}
|
||||
}
|
||||
66
src/main/scala/uncore/devices/Rom.scala
Normal file
66
src/main/scala/uncore/devices/Rom.scala
Normal file
@@ -0,0 +1,66 @@
|
||||
package uncore.devices
|
||||
|
||||
import Chisel._
|
||||
import junctions._
|
||||
import uncore.tilelink._
|
||||
import uncore.util._
|
||||
import cde.{Parameters, Field}
|
||||
|
||||
class ROMSlave(contents: Seq[Byte])(implicit val p: Parameters) extends Module
|
||||
with HasTileLinkParameters
|
||||
with HasAddrMapParameters {
|
||||
val io = new ClientUncachedTileLinkIO().flip
|
||||
|
||||
val acq = Queue(io.acquire, 1)
|
||||
val single_beat = acq.bits.isBuiltInType(Acquire.getType)
|
||||
val multi_beat = acq.bits.isBuiltInType(Acquire.getBlockType)
|
||||
assert(!acq.valid || single_beat || multi_beat, "unsupported ROMSlave operation")
|
||||
|
||||
val addr_beat = Reg(UInt())
|
||||
when (io.grant.fire()) { addr_beat := addr_beat + UInt(1) }
|
||||
when (io.acquire.fire()) { addr_beat := io.acquire.bits.addr_beat }
|
||||
|
||||
val byteWidth = tlDataBits / 8
|
||||
val rows = (contents.size + byteWidth - 1)/byteWidth
|
||||
val rom = Vec.tabulate(rows) { i =>
|
||||
val slice = contents.slice(i*byteWidth, (i+1)*byteWidth)
|
||||
UInt(slice.foldRight(BigInt(0)) { case (x,y) => (y << 8) + (x.toInt & 0xFF) }, byteWidth*8)
|
||||
}
|
||||
val raddr = Cat(acq.bits.addr_block, addr_beat)
|
||||
val rdata = rom(if (rows == 1) UInt(0) else raddr(log2Up(rom.size)-1,0))
|
||||
|
||||
val last = !multi_beat || addr_beat === UInt(tlDataBeats-1)
|
||||
io.grant.valid := acq.valid
|
||||
acq.ready := io.grant.ready && last
|
||||
io.grant.bits := Grant(
|
||||
is_builtin_type = Bool(true),
|
||||
g_type = acq.bits.getBuiltInGrantType(),
|
||||
client_xact_id = acq.bits.client_xact_id,
|
||||
manager_xact_id = UInt(0),
|
||||
addr_beat = addr_beat,
|
||||
data = rdata)
|
||||
}
|
||||
|
||||
class NastiROM(contents: Seq[Byte])(implicit p: Parameters) extends Module {
|
||||
val io = new NastiIO().flip
|
||||
val ar = Queue(io.ar, 1)
|
||||
|
||||
// This assumes ROMs are in read-only parts of the address map.
|
||||
// Reuse b_queue code from NastiErrorSlave if this assumption is bad.
|
||||
when (ar.valid) { assert(ar.bits.len === UInt(0), "Can't burst-read from NastiROM") }
|
||||
assert(!(io.aw.valid || io.w.valid), "Can't write to NastiROM")
|
||||
io.aw.ready := Bool(false)
|
||||
io.w.ready := Bool(false)
|
||||
io.b.valid := Bool(false)
|
||||
|
||||
val byteWidth = io.r.bits.nastiXDataBits / 8
|
||||
val rows = (contents.size + byteWidth - 1)/byteWidth
|
||||
val rom = Vec.tabulate(rows) { i =>
|
||||
val slice = contents.slice(i*byteWidth, (i+1)*byteWidth)
|
||||
UInt(slice.foldRight(BigInt(0)) { case (x,y) => (y << 8) + (x.toInt & 0xFF) }, byteWidth*8)
|
||||
}
|
||||
val rdata = rom(if (rows == 1) UInt(0) else ar.bits.addr(log2Up(contents.size)-1,log2Up(byteWidth)))
|
||||
|
||||
io.r <> ar
|
||||
io.r.bits := NastiReadDataChannel(ar.bits.id, rdata)
|
||||
}
|
||||
196
src/main/scala/uncore/tilelink/Arbiters.scala
Normal file
196
src/main/scala/uncore/tilelink/Arbiters.scala
Normal file
@@ -0,0 +1,196 @@
|
||||
package uncore.tilelink
|
||||
import Chisel._
|
||||
import junctions._
|
||||
import cde.{Parameters, Field}
|
||||
|
||||
/** Utility functions for constructing TileLinkIO arbiters */
|
||||
trait TileLinkArbiterLike extends HasTileLinkParameters {
|
||||
// Some shorthand type variables
|
||||
type ManagerSourcedWithId = ManagerToClientChannel with HasClientTransactionId
|
||||
type ClientSourcedWithId = ClientToManagerChannel with HasClientTransactionId
|
||||
type ClientSourcedWithIdAndData = ClientToManagerChannel with HasClientTransactionId with HasTileLinkData
|
||||
|
||||
val arbN: Int // The number of ports on the client side
|
||||
|
||||
// These abstract funcs are filled in depending on whether the arbiter mucks with the
|
||||
// outgoing client ids to track sourcing and then needs to revert them on the way back
|
||||
def clientSourcedClientXactId(in: ClientSourcedWithId, id: Int): Bits
|
||||
def managerSourcedClientXactId(in: ManagerSourcedWithId): Bits
|
||||
def arbIdx(in: ManagerSourcedWithId): UInt
|
||||
|
||||
// The following functions are all wiring helpers for each of the different types of TileLink channels
|
||||
|
||||
def hookupClientSource[M <: ClientSourcedWithIdAndData](
|
||||
clts: Seq[DecoupledIO[LogicalNetworkIO[M]]],
|
||||
mngr: DecoupledIO[LogicalNetworkIO[M]]) {
|
||||
def hasData(m: LogicalNetworkIO[M]) = m.payload.hasMultibeatData()
|
||||
val arb = Module(new LockingRRArbiter(mngr.bits, arbN, tlDataBeats, Some(hasData _)))
|
||||
clts.zipWithIndex.zip(arb.io.in).map{ case ((req, id), arb) => {
|
||||
arb.valid := req.valid
|
||||
arb.bits := req.bits
|
||||
arb.bits.payload.client_xact_id := clientSourcedClientXactId(req.bits.payload, id)
|
||||
req.ready := arb.ready
|
||||
}}
|
||||
mngr <> arb.io.out
|
||||
}
|
||||
|
||||
def hookupClientSourceHeaderless[M <: ClientSourcedWithIdAndData](
|
||||
clts: Seq[DecoupledIO[M]],
|
||||
mngr: DecoupledIO[M]) {
|
||||
def hasData(m: M) = m.hasMultibeatData()
|
||||
val arb = Module(new LockingRRArbiter(mngr.bits, arbN, tlDataBeats, Some(hasData _)))
|
||||
clts.zipWithIndex.zip(arb.io.in).map{ case ((req, id), arb) => {
|
||||
arb.valid := req.valid
|
||||
arb.bits := req.bits
|
||||
arb.bits.client_xact_id := clientSourcedClientXactId(req.bits, id)
|
||||
req.ready := arb.ready
|
||||
}}
|
||||
mngr <> arb.io.out
|
||||
}
|
||||
|
||||
def hookupManagerSourceWithHeader[M <: ManagerToClientChannel](
|
||||
clts: Seq[DecoupledIO[LogicalNetworkIO[M]]],
|
||||
mngr: DecoupledIO[LogicalNetworkIO[M]]) {
|
||||
mngr.ready := Bool(false)
|
||||
for (i <- 0 until arbN) {
|
||||
clts(i).valid := Bool(false)
|
||||
when (mngr.bits.header.dst === UInt(i)) {
|
||||
clts(i).valid := mngr.valid
|
||||
mngr.ready := clts(i).ready
|
||||
}
|
||||
clts(i).bits := mngr.bits
|
||||
}
|
||||
}
|
||||
|
||||
def hookupManagerSourceWithId[M <: ManagerSourcedWithId](
|
||||
clts: Seq[DecoupledIO[LogicalNetworkIO[M]]],
|
||||
mngr: DecoupledIO[LogicalNetworkIO[M]]) {
|
||||
mngr.ready := Bool(false)
|
||||
for (i <- 0 until arbN) {
|
||||
clts(i).valid := Bool(false)
|
||||
when (arbIdx(mngr.bits.payload) === UInt(i)) {
|
||||
clts(i).valid := mngr.valid
|
||||
mngr.ready := clts(i).ready
|
||||
}
|
||||
clts(i).bits := mngr.bits
|
||||
clts(i).bits.payload.client_xact_id := managerSourcedClientXactId(mngr.bits.payload)
|
||||
}
|
||||
}
|
||||
|
||||
def hookupManagerSourceHeaderlessWithId[M <: ManagerSourcedWithId](
|
||||
clts: Seq[DecoupledIO[M]],
|
||||
mngr: DecoupledIO[M]) {
|
||||
mngr.ready := Bool(false)
|
||||
for (i <- 0 until arbN) {
|
||||
clts(i).valid := Bool(false)
|
||||
when (arbIdx(mngr.bits) === UInt(i)) {
|
||||
clts(i).valid := mngr.valid
|
||||
mngr.ready := clts(i).ready
|
||||
}
|
||||
clts(i).bits := mngr.bits
|
||||
clts(i).bits.client_xact_id := managerSourcedClientXactId(mngr.bits)
|
||||
}
|
||||
}
|
||||
|
||||
def hookupManagerSourceBroadcast[M <: Data](clts: Seq[DecoupledIO[M]], mngr: DecoupledIO[M]) {
|
||||
clts.map{ _.valid := mngr.valid }
|
||||
clts.map{ _.bits := mngr.bits }
|
||||
mngr.ready := clts.map(_.ready).reduce(_&&_)
|
||||
}
|
||||
|
||||
def hookupFinish[M <: LogicalNetworkIO[Finish]]( clts: Seq[DecoupledIO[M]], mngr: DecoupledIO[M]) {
|
||||
val arb = Module(new RRArbiter(mngr.bits, arbN))
|
||||
arb.io.in <> clts
|
||||
mngr <> arb.io.out
|
||||
}
|
||||
}
|
||||
|
||||
/** Abstract base case for any Arbiters that have UncachedTileLinkIOs */
|
||||
abstract class UncachedTileLinkIOArbiter(val arbN: Int)(implicit val p: Parameters) extends Module
|
||||
with TileLinkArbiterLike {
|
||||
val io = new Bundle {
|
||||
val in = Vec(arbN, new UncachedTileLinkIO).flip
|
||||
val out = new UncachedTileLinkIO
|
||||
}
|
||||
hookupClientSource(io.in.map(_.acquire), io.out.acquire)
|
||||
hookupFinish(io.in.map(_.finish), io.out.finish)
|
||||
hookupManagerSourceWithId(io.in.map(_.grant), io.out.grant)
|
||||
}
|
||||
|
||||
/** Abstract base case for any Arbiters that have cached TileLinkIOs */
|
||||
abstract class TileLinkIOArbiter(val arbN: Int)(implicit val p: Parameters) extends Module
|
||||
with TileLinkArbiterLike {
|
||||
val io = new Bundle {
|
||||
val in = Vec(arbN, new TileLinkIO).flip
|
||||
val out = new TileLinkIO
|
||||
}
|
||||
hookupClientSource(io.in.map(_.acquire), io.out.acquire)
|
||||
hookupClientSource(io.in.map(_.release), io.out.release)
|
||||
hookupFinish(io.in.map(_.finish), io.out.finish)
|
||||
hookupManagerSourceBroadcast(io.in.map(_.probe), io.out.probe)
|
||||
hookupManagerSourceWithId(io.in.map(_.grant), io.out.grant)
|
||||
}
|
||||
|
||||
/** Appends the port index of the arbiter to the client_xact_id */
|
||||
trait AppendsArbiterId extends TileLinkArbiterLike {
|
||||
def clientSourcedClientXactId(in: ClientSourcedWithId, id: Int) =
|
||||
Cat(in.client_xact_id, UInt(id, log2Up(arbN)))
|
||||
def managerSourcedClientXactId(in: ManagerSourcedWithId) = {
|
||||
/* This shouldn't be necessary, but Chisel3 doesn't emit correct Verilog
|
||||
* when right shifting by too many bits. See
|
||||
* https://github.com/ucb-bar/firrtl/issues/69 */
|
||||
if (in.client_xact_id.getWidth > log2Up(arbN))
|
||||
in.client_xact_id >> log2Up(arbN)
|
||||
else
|
||||
UInt(0)
|
||||
}
|
||||
def arbIdx(in: ManagerSourcedWithId) = in.client_xact_id(log2Up(arbN)-1,0)
|
||||
}
|
||||
|
||||
/** Uses the client_xact_id as is (assumes it has been set to port index) */
|
||||
trait PassesId extends TileLinkArbiterLike {
|
||||
def clientSourcedClientXactId(in: ClientSourcedWithId, id: Int) = in.client_xact_id
|
||||
def managerSourcedClientXactId(in: ManagerSourcedWithId) = in.client_xact_id
|
||||
def arbIdx(in: ManagerSourcedWithId) = in.client_xact_id
|
||||
}
|
||||
|
||||
/** Overwrites some default client_xact_id with the port idx */
|
||||
trait UsesNewId extends TileLinkArbiterLike {
|
||||
def clientSourcedClientXactId(in: ClientSourcedWithId, id: Int) = UInt(id, log2Up(arbN))
|
||||
def managerSourcedClientXactId(in: ManagerSourcedWithId) = UInt(0)
|
||||
def arbIdx(in: ManagerSourcedWithId) = in.client_xact_id
|
||||
}
|
||||
|
||||
// Now we can mix-in thevarious id-generation traits to make concrete arbiter classes
|
||||
class UncachedTileLinkIOArbiterThatAppendsArbiterId(val n: Int)(implicit p: Parameters) extends UncachedTileLinkIOArbiter(n)(p) with AppendsArbiterId
|
||||
class UncachedTileLinkIOArbiterThatPassesId(val n: Int)(implicit p: Parameters) extends UncachedTileLinkIOArbiter(n)(p) with PassesId
|
||||
class UncachedTileLinkIOArbiterThatUsesNewId(val n: Int)(implicit p: Parameters) extends UncachedTileLinkIOArbiter(n)(p) with UsesNewId
|
||||
class TileLinkIOArbiterThatAppendsArbiterId(val n: Int)(implicit p: Parameters) extends TileLinkIOArbiter(n)(p) with AppendsArbiterId
|
||||
class TileLinkIOArbiterThatPassesId(val n: Int)(implicit p: Parameters) extends TileLinkIOArbiter(n)(p) with PassesId
|
||||
class TileLinkIOArbiterThatUsesNewId(val n: Int)(implicit p: Parameters) extends TileLinkIOArbiter(n)(p) with UsesNewId
|
||||
|
||||
/** Concrete uncached client-side arbiter that appends the arbiter's port id to client_xact_id */
|
||||
class ClientUncachedTileLinkIOArbiter(val arbN: Int)(implicit val p: Parameters) extends Module with TileLinkArbiterLike with AppendsArbiterId {
|
||||
val io = new Bundle {
|
||||
val in = Vec(arbN, new ClientUncachedTileLinkIO).flip
|
||||
val out = new ClientUncachedTileLinkIO
|
||||
}
|
||||
if (arbN > 1) {
|
||||
hookupClientSourceHeaderless(io.in.map(_.acquire), io.out.acquire)
|
||||
hookupManagerSourceHeaderlessWithId(io.in.map(_.grant), io.out.grant)
|
||||
} else { io.out <> io.in.head }
|
||||
}
|
||||
|
||||
/** Concrete client-side arbiter that appends the arbiter's port id to client_xact_id */
|
||||
class ClientTileLinkIOArbiter(val arbN: Int)(implicit val p: Parameters) extends Module with TileLinkArbiterLike with AppendsArbiterId {
|
||||
val io = new Bundle {
|
||||
val in = Vec(arbN, new ClientTileLinkIO).flip
|
||||
val out = new ClientTileLinkIO
|
||||
}
|
||||
if (arbN > 1) {
|
||||
hookupClientSourceHeaderless(io.in.map(_.acquire), io.out.acquire)
|
||||
hookupClientSourceHeaderless(io.in.map(_.release), io.out.release)
|
||||
hookupManagerSourceBroadcast(io.in.map(_.probe), io.out.probe)
|
||||
hookupManagerSourceHeaderlessWithId(io.in.map(_.grant), io.out.grant)
|
||||
} else { io.out <> io.in.head }
|
||||
}
|
||||
971
src/main/scala/uncore/tilelink/Definitions.scala
Normal file
971
src/main/scala/uncore/tilelink/Definitions.scala
Normal file
@@ -0,0 +1,971 @@
|
||||
// See LICENSE for license details.
|
||||
|
||||
package uncore.tilelink
|
||||
import Chisel._
|
||||
import junctions._
|
||||
import uncore.coherence.CoherencePolicy
|
||||
import uncore.util._
|
||||
import scala.math.max
|
||||
import uncore.constants._
|
||||
import cde.{Parameters, Field}
|
||||
|
||||
case object CacheBlockOffsetBits extends Field[Int]
|
||||
case object AmoAluOperandBits extends Field[Int]
|
||||
|
||||
case object TLId extends Field[String]
|
||||
case class TLKey(id: String) extends Field[TileLinkParameters]
|
||||
|
||||
/** Parameters exposed to the top-level design, set based on
|
||||
* external requirements or design space exploration
|
||||
*
|
||||
* Coherency policy used to define custom mesage types
|
||||
* Number of manager agents
|
||||
* Number of client agents that cache data and use custom [[uncore.Acquire]] types
|
||||
* Number of client agents that do not cache data and use built-in [[uncore.Acquire]] types
|
||||
* Maximum number of unique outstanding transactions per client
|
||||
* Maximum number of clients multiplexed onto a single port
|
||||
* Maximum number of unique outstanding transactions per manager
|
||||
* Width of cache block addresses
|
||||
* Total amount of data per cache block
|
||||
* Number of data beats per cache block
|
||||
**/
|
||||
|
||||
case class TileLinkParameters(
|
||||
coherencePolicy: CoherencePolicy,
|
||||
nManagers: Int,
|
||||
nCachingClients: Int,
|
||||
nCachelessClients: Int,
|
||||
maxClientXacts: Int,
|
||||
maxClientsPerPort: Int,
|
||||
maxManagerXacts: Int,
|
||||
dataBits: Int,
|
||||
dataBeats: Int = 4,
|
||||
overrideDataBitsPerBeat: Option[Int] = None
|
||||
) {
|
||||
val nClients = nCachingClients + nCachelessClients
|
||||
val writeMaskBits: Int = ((dataBits / dataBeats) - 1) / 8 + 1
|
||||
val dataBitsPerBeat: Int = overrideDataBitsPerBeat.getOrElse(dataBits / dataBeats)
|
||||
}
|
||||
|
||||
|
||||
/** Utility trait for building Modules and Bundles that use TileLink parameters */
|
||||
trait HasTileLinkParameters {
|
||||
implicit val p: Parameters
|
||||
val tlExternal = p(TLKey(p(TLId)))
|
||||
val tlCoh = tlExternal.coherencePolicy
|
||||
val tlNManagers = tlExternal.nManagers
|
||||
val tlNCachingClients = tlExternal.nCachingClients
|
||||
val tlNCachelessClients = tlExternal.nCachelessClients
|
||||
val tlNClients = tlExternal.nClients
|
||||
val tlClientIdBits = log2Up(tlNClients)
|
||||
val tlManagerIdBits = log2Up(tlNManagers)
|
||||
val tlMaxClientXacts = tlExternal.maxClientXacts
|
||||
val tlMaxClientsPerPort = tlExternal.maxClientsPerPort
|
||||
val tlMaxManagerXacts = tlExternal.maxManagerXacts
|
||||
val tlClientXactIdBits = log2Up(tlMaxClientXacts*tlMaxClientsPerPort)
|
||||
val tlManagerXactIdBits = log2Up(tlMaxManagerXacts)
|
||||
val tlBlockAddrBits = p(PAddrBits) - p(CacheBlockOffsetBits)
|
||||
val tlDataBeats = tlExternal.dataBeats
|
||||
val tlDataBits = tlExternal.dataBitsPerBeat
|
||||
val tlDataBytes = tlDataBits/8
|
||||
val tlWriteMaskBits = tlExternal.writeMaskBits
|
||||
val tlBeatAddrBits = log2Up(tlDataBeats)
|
||||
val tlByteAddrBits = log2Up(tlWriteMaskBits)
|
||||
val tlMemoryOpcodeBits = M_SZ
|
||||
val tlMemoryOperandSizeBits = log2Ceil(log2Ceil(tlWriteMaskBits) + 1)
|
||||
val tlAcquireTypeBits = max(log2Up(Acquire.nBuiltInTypes),
|
||||
tlCoh.acquireTypeWidth)
|
||||
val tlAcquireUnionBits = max(tlWriteMaskBits,
|
||||
(tlByteAddrBits +
|
||||
tlMemoryOperandSizeBits +
|
||||
tlMemoryOpcodeBits)) + 1
|
||||
val tlGrantTypeBits = max(log2Up(Grant.nBuiltInTypes),
|
||||
tlCoh.grantTypeWidth) + 1
|
||||
/** Whether the underlying physical network preserved point-to-point ordering of messages */
|
||||
val tlNetworkPreservesPointToPointOrdering = false
|
||||
val tlNetworkDoesNotInterleaveBeats = true
|
||||
val amoAluOperandBits = p(AmoAluOperandBits)
|
||||
val amoAluOperandBytes = amoAluOperandBits/8
|
||||
}
|
||||
|
||||
abstract class TLModule(implicit val p: Parameters) extends Module
|
||||
with HasTileLinkParameters
|
||||
abstract class TLBundle(implicit val p: Parameters) extends junctions.ParameterizedBundle()(p)
|
||||
with HasTileLinkParameters
|
||||
|
||||
/** Base trait for all TileLink channels */
|
||||
abstract class TileLinkChannel(implicit p: Parameters) extends TLBundle()(p) {
|
||||
def hasData(dummy: Int = 0): Bool
|
||||
def hasMultibeatData(dummy: Int = 0): Bool
|
||||
}
|
||||
/** Directionality of message channel. Used to hook up logical network ports to physical network ports */
|
||||
abstract class ClientToManagerChannel(implicit p: Parameters) extends TileLinkChannel()(p)
|
||||
/** Directionality of message channel. Used to hook up logical network ports to physical network ports */
|
||||
abstract class ManagerToClientChannel(implicit p: Parameters) extends TileLinkChannel()(p)
|
||||
/** Directionality of message channel. Used to hook up logical network ports to physical network ports */
|
||||
abstract class ClientToClientChannel(implicit p: Parameters) extends TileLinkChannel()(p) // Unused for now
|
||||
|
||||
/** Common signals that are used in multiple channels.
|
||||
* These traits are useful for type parameterizing bundle wiring functions.
|
||||
*/
|
||||
|
||||
/** Address of a cache block. */
|
||||
trait HasCacheBlockAddress extends HasTileLinkParameters {
|
||||
val addr_block = UInt(width = tlBlockAddrBits)
|
||||
|
||||
def conflicts(that: HasCacheBlockAddress) = this.addr_block === that.addr_block
|
||||
def conflicts(addr: UInt) = this.addr_block === addr
|
||||
}
|
||||
|
||||
/** Sub-block address or beat id of multi-beat data */
|
||||
trait HasTileLinkBeatId extends HasTileLinkParameters {
|
||||
val addr_beat = UInt(width = tlBeatAddrBits)
|
||||
}
|
||||
|
||||
/* Client-side transaction id. Usually Miss Status Handling Register File index */
|
||||
trait HasClientTransactionId extends HasTileLinkParameters {
|
||||
val client_xact_id = Bits(width = tlClientXactIdBits)
|
||||
}
|
||||
|
||||
/** Manager-side transaction id. Usually Transaction Status Handling Register File index. */
|
||||
trait HasManagerTransactionId extends HasTileLinkParameters {
|
||||
val manager_xact_id = Bits(width = tlManagerXactIdBits)
|
||||
}
|
||||
|
||||
/** A single beat of cache block data */
|
||||
trait HasTileLinkData extends HasTileLinkBeatId {
|
||||
val data = UInt(width = tlDataBits)
|
||||
|
||||
def hasData(dummy: Int = 0): Bool
|
||||
def hasMultibeatData(dummy: Int = 0): Bool
|
||||
def first(dummy: Int = 0): Bool = !hasMultibeatData() || addr_beat === UInt(0)
|
||||
def last(dummy: Int = 0): Bool = !hasMultibeatData() || addr_beat === UInt(tlDataBeats-1)
|
||||
}
|
||||
|
||||
/** An entire cache block of data */
|
||||
trait HasTileLinkBlock extends HasTileLinkParameters {
|
||||
val data_buffer = Vec(tlDataBeats, UInt(width = tlDataBits))
|
||||
val wmask_buffer = Vec(tlDataBeats, UInt(width = tlWriteMaskBits))
|
||||
}
|
||||
|
||||
/** The id of a client source or destination. Used in managers. */
|
||||
trait HasClientId extends HasTileLinkParameters {
|
||||
val client_id = UInt(width = tlClientIdBits)
|
||||
}
|
||||
|
||||
trait HasManagerId extends HasTileLinkParameters {
|
||||
val manager_id = UInt(width = tlManagerIdBits)
|
||||
}
|
||||
|
||||
trait HasAcquireUnion extends HasTileLinkParameters {
|
||||
val union = Bits(width = tlAcquireUnionBits)
|
||||
|
||||
// Utility funcs for accessing subblock union:
|
||||
def isBuiltInType(t: UInt): Bool
|
||||
val opCodeOff = 1
|
||||
val opSizeOff = tlMemoryOpcodeBits + opCodeOff
|
||||
val addrByteOff = tlMemoryOperandSizeBits + opSizeOff
|
||||
val addrByteMSB = tlByteAddrBits + addrByteOff
|
||||
/** Hint whether to allocate the block in any interveneing caches */
|
||||
def allocate(dummy: Int = 0) = union(0)
|
||||
/** Op code for [[uncore.PutAtomic]] operations */
|
||||
def op_code(dummy: Int = 0) = Mux(
|
||||
isBuiltInType(Acquire.putType) || isBuiltInType(Acquire.putBlockType),
|
||||
M_XWR, union(opSizeOff-1, opCodeOff))
|
||||
/** Operand size for [[uncore.PutAtomic]] */
|
||||
def op_size(dummy: Int = 0) = union(addrByteOff-1, opSizeOff)
|
||||
/** Byte address for [[uncore.PutAtomic]] operand */
|
||||
def addr_byte(dummy: Int = 0) = union(addrByteMSB-1, addrByteOff)
|
||||
def amo_offset(dummy: Int = 0) =
|
||||
if (tlByteAddrBits > log2Up(amoAluOperandBytes)) addr_byte()(tlByteAddrBits-1, log2Up(amoAluOperandBytes))
|
||||
else UInt(0)
|
||||
/** Bit offset of [[uncore.PutAtomic]] operand */
|
||||
def amo_shift_bytes(dummy: Int = 0) = UInt(amoAluOperandBytes)*amo_offset()
|
||||
/** Write mask for [[uncore.Put]], [[uncore.PutBlock]], [[uncore.PutAtomic]] */
|
||||
def wmask(dummy: Int = 0): UInt = {
|
||||
val is_amo = isBuiltInType(Acquire.putAtomicType)
|
||||
val amo_mask = if (tlByteAddrBits > log2Up(amoAluOperandBytes))
|
||||
FillInterleaved(amoAluOperandBytes, UIntToOH(amo_offset()))
|
||||
else Acquire.fullWriteMask
|
||||
val is_put = isBuiltInType(Acquire.putBlockType) || isBuiltInType(Acquire.putType)
|
||||
val put_mask = union(tlWriteMaskBits, 1)
|
||||
Mux(is_amo, amo_mask, Mux(is_put, put_mask, UInt(0)))
|
||||
}
|
||||
/** Full, beat-sized writemask */
|
||||
def full_wmask(dummy: Int = 0) = FillInterleaved(8, wmask())
|
||||
|
||||
/** Is this message a built-in read message */
|
||||
def hasPartialWritemask(dummy: Int = 0): Bool = wmask() =/= Acquire.fullWriteMask
|
||||
|
||||
}
|
||||
|
||||
trait HasAcquireType extends HasTileLinkParameters {
|
||||
val is_builtin_type = Bool()
|
||||
val a_type = UInt(width = tlAcquireTypeBits)
|
||||
|
||||
/** Message type equality */
|
||||
def is(t: UInt) = a_type === t //TODO: make this more opaque; def ===?
|
||||
|
||||
/** Is this message a built-in or custom type */
|
||||
def isBuiltInType(dummy: Int = 0): Bool = is_builtin_type
|
||||
/** Is this message a particular built-in type */
|
||||
def isBuiltInType(t: UInt): Bool = is_builtin_type && a_type === t
|
||||
|
||||
/** Does this message refer to subblock operands using info in the Acquire.union subbundle */
|
||||
def isSubBlockType(dummy: Int = 0): Bool = isBuiltInType() && a_type.isOneOf(Acquire.typesOnSubBlocks)
|
||||
|
||||
/** Is this message a built-in prefetch message */
|
||||
def isPrefetch(dummy: Int = 0): Bool = isBuiltInType() &&
|
||||
(is(Acquire.getPrefetchType) || is(Acquire.putPrefetchType))
|
||||
|
||||
/** Is this message a built-in atomic message */
|
||||
def isAtomic(dummy: Int = 0): Bool = isBuiltInType() && is(Acquire.putAtomicType)
|
||||
|
||||
/** Is this message a built-in read message */
|
||||
def isGet(dummy: Int = 0): Bool = isBuiltInType() && (is(Acquire.getType) || is(Acquire.getBlockType))
|
||||
|
||||
/** Does this message contain data? Assumes that no custom message types have data. */
|
||||
def hasData(dummy: Int = 0): Bool = isBuiltInType() && a_type.isOneOf(Acquire.typesWithData)
|
||||
|
||||
/** Does this message contain multiple beats of data? Assumes that no custom message types have data. */
|
||||
def hasMultibeatData(dummy: Int = 0): Bool = Bool(tlDataBeats > 1) && isBuiltInType() &&
|
||||
a_type.isOneOf(Acquire.typesWithMultibeatData)
|
||||
|
||||
/** Mapping between each built-in Acquire type and a built-in Grant type. */
|
||||
def getBuiltInGrantType(dummy: Int = 0): UInt = Acquire.getBuiltInGrantType(this.a_type)
|
||||
}
|
||||
|
||||
trait HasProbeType extends HasTileLinkParameters {
|
||||
val p_type = UInt(width = tlCoh.probeTypeWidth)
|
||||
|
||||
def is(t: UInt) = p_type === t
|
||||
def hasData(dummy: Int = 0) = Bool(false)
|
||||
def hasMultibeatData(dummy: Int = 0) = Bool(false)
|
||||
}
|
||||
|
||||
trait MightBeVoluntary {
|
||||
def isVoluntary(dummy: Int = 0): Bool
|
||||
}
|
||||
|
||||
trait HasReleaseType extends HasTileLinkParameters with MightBeVoluntary {
|
||||
val voluntary = Bool()
|
||||
val r_type = UInt(width = tlCoh.releaseTypeWidth)
|
||||
|
||||
def is(t: UInt) = r_type === t
|
||||
def hasData(dummy: Int = 0) = r_type.isOneOf(tlCoh.releaseTypesWithData)
|
||||
def hasMultibeatData(dummy: Int = 0) = Bool(tlDataBeats > 1) &&
|
||||
r_type.isOneOf(tlCoh.releaseTypesWithData)
|
||||
def isVoluntary(dummy: Int = 0) = voluntary
|
||||
def requiresAck(dummy: Int = 0) = !Bool(tlNetworkPreservesPointToPointOrdering)
|
||||
}
|
||||
|
||||
trait HasGrantType extends HasTileLinkParameters with MightBeVoluntary {
|
||||
val is_builtin_type = Bool()
|
||||
val g_type = UInt(width = tlGrantTypeBits)
|
||||
|
||||
// Helper funcs
|
||||
def isBuiltInType(dummy: Int = 0): Bool = is_builtin_type
|
||||
def isBuiltInType(t: UInt): Bool = is_builtin_type && g_type === t
|
||||
def is(t: UInt):Bool = g_type === t
|
||||
def hasData(dummy: Int = 0): Bool = Mux(isBuiltInType(),
|
||||
g_type.isOneOf(Grant.typesWithData),
|
||||
g_type.isOneOf(tlCoh.grantTypesWithData))
|
||||
def hasMultibeatData(dummy: Int = 0): Bool =
|
||||
Bool(tlDataBeats > 1) && Mux(isBuiltInType(),
|
||||
g_type.isOneOf(Grant.typesWithMultibeatData),
|
||||
g_type.isOneOf(tlCoh.grantTypesWithData))
|
||||
def isVoluntary(dummy: Int = 0): Bool = isBuiltInType() && (g_type === Grant.voluntaryAckType)
|
||||
def requiresAck(dummy: Int = 0): Bool = !Bool(tlNetworkPreservesPointToPointOrdering) && !isVoluntary()
|
||||
}
|
||||
|
||||
/** TileLink channel bundle definitions */
|
||||
|
||||
/** The Acquire channel is used to intiate coherence protocol transactions in
|
||||
* order to gain access to a cache block's data with certain permissions
|
||||
* enabled. Messages sent over this channel may be custom types defined by
|
||||
* a [[uncore.CoherencePolicy]] for cached data accesse or may be built-in types
|
||||
* used for uncached data accesses. Acquires may contain data for Put or
|
||||
* PutAtomic built-in types. After sending an Acquire, clients must
|
||||
* wait for a manager to send them a [[uncore.Grant]] message in response.
|
||||
*/
|
||||
class AcquireMetadata(implicit p: Parameters) extends ClientToManagerChannel
|
||||
with HasCacheBlockAddress
|
||||
with HasClientTransactionId
|
||||
with HasTileLinkBeatId
|
||||
with HasAcquireType
|
||||
with HasAcquireUnion {
|
||||
/** Complete physical address for block, beat or operand */
|
||||
def full_addr(dummy: Int = 0) =
|
||||
Cat(this.addr_block, this.addr_beat,
|
||||
Mux(isBuiltInType() && this.a_type.isOneOf(Acquire.typesWithAddrByte),
|
||||
this.addr_byte(), UInt(0, tlByteAddrBits)))
|
||||
}
|
||||
|
||||
/** [[uncore.AcquireMetadata]] with an extra field containing the data beat */
|
||||
class Acquire(implicit p: Parameters) extends AcquireMetadata
|
||||
with HasTileLinkData
|
||||
|
||||
/** [[uncore.AcquireMetadata]] with an extra field containing the entire cache block */
|
||||
class BufferedAcquire(implicit p: Parameters) extends AcquireMetadata
|
||||
with HasTileLinkBlock
|
||||
|
||||
/** [[uncore.Acquire]] with an extra field stating its source id */
|
||||
class AcquireFromSrc(implicit p: Parameters) extends Acquire
|
||||
with HasClientId
|
||||
|
||||
/** [[uncore.BufferedAcquire]] with an extra field stating its source id */
|
||||
class BufferedAcquireFromSrc(implicit p: Parameters) extends BufferedAcquire
|
||||
with HasClientId
|
||||
|
||||
/** Used to track metadata for transactions where multiple secondary misses have been merged
|
||||
* and handled by a single transaction tracker.
|
||||
*/
|
||||
class SecondaryMissInfo(implicit p: Parameters) extends TLBundle
|
||||
with HasClientTransactionId
|
||||
with HasTileLinkBeatId
|
||||
with HasClientId
|
||||
with HasAcquireType
|
||||
|
||||
/** Contains definitions of the the built-in Acquire types and a factory
|
||||
* for [[uncore.Acquire]]
|
||||
*
|
||||
* In general you should avoid using this factory directly and use
|
||||
* [[uncore.ClientMetadata.makeAcquire]] for custom cached Acquires and
|
||||
* [[uncore.Get]], [[uncore.Put]], etc. for built-in uncached Acquires.
|
||||
*
|
||||
* @param is_builtin_type built-in or custom type message?
|
||||
* @param a_type built-in type enum or custom type enum
|
||||
* @param client_xact_id client's transaction id
|
||||
* @param addr_block address of the cache block
|
||||
* @param addr_beat sub-block address (which beat)
|
||||
* @param data data being put outwards
|
||||
* @param union additional fields used for uncached types
|
||||
*/
|
||||
object Acquire {
|
||||
val nBuiltInTypes = 7
|
||||
//TODO: Use Enum
|
||||
def getType = UInt("b000") // Get a single beat of data
|
||||
def getBlockType = UInt("b001") // Get a whole block of data
|
||||
def putType = UInt("b010") // Put a single beat of data
|
||||
def putBlockType = UInt("b011") // Put a whole block of data
|
||||
def putAtomicType = UInt("b100") // Perform an atomic memory op
|
||||
def getPrefetchType = UInt("b101") // Prefetch a whole block of data
|
||||
def putPrefetchType = UInt("b110") // Prefetch a whole block of data, with intent to write
|
||||
def typesWithData = Vec(putType, putBlockType, putAtomicType)
|
||||
def typesWithMultibeatData = Vec(putBlockType)
|
||||
def typesOnSubBlocks = Vec(putType, getType, putAtomicType)
|
||||
def typesWithAddrByte = Vec(getType, putAtomicType)
|
||||
|
||||
/** Mapping between each built-in Acquire type and a built-in Grant type. */
|
||||
def getBuiltInGrantType(a_type: UInt): UInt = {
|
||||
MuxLookup(a_type, Grant.putAckType, Array(
|
||||
Acquire.getType -> Grant.getDataBeatType,
|
||||
Acquire.getBlockType -> Grant.getDataBlockType,
|
||||
Acquire.putType -> Grant.putAckType,
|
||||
Acquire.putBlockType -> Grant.putAckType,
|
||||
Acquire.putAtomicType -> Grant.getDataBeatType,
|
||||
Acquire.getPrefetchType -> Grant.prefetchAckType,
|
||||
Acquire.putPrefetchType -> Grant.prefetchAckType))
|
||||
}
|
||||
|
||||
def makeUnion(
|
||||
a_type: UInt,
|
||||
addr_byte: UInt,
|
||||
operand_size: UInt,
|
||||
opcode: UInt,
|
||||
wmask: UInt,
|
||||
alloc: Bool)
|
||||
(implicit p: Parameters): UInt = {
|
||||
|
||||
val tlExternal = p(TLKey(p(TLId)))
|
||||
val tlWriteMaskBits = tlExternal.writeMaskBits
|
||||
val tlByteAddrBits = log2Up(tlWriteMaskBits)
|
||||
val tlMemoryOperandSizeBits = log2Ceil(log2Ceil(tlWriteMaskBits) + 1)
|
||||
|
||||
// These had better be the right size when we cat them together!
|
||||
val my_addr_byte = (UInt(0, tlByteAddrBits) | addr_byte)(tlByteAddrBits-1, 0)
|
||||
val my_operand_size = (UInt(0, tlMemoryOperandSizeBits) | operand_size)(tlMemoryOperandSizeBits-1, 0)
|
||||
val my_opcode = (UInt(0, M_SZ) | opcode)(M_SZ-1, 0)
|
||||
val my_wmask = (UInt(0, tlWriteMaskBits) | wmask)(tlWriteMaskBits-1, 0)
|
||||
|
||||
MuxLookup(a_type, UInt(0), Array(
|
||||
Acquire.getType -> Cat(my_addr_byte, my_operand_size, my_opcode, alloc),
|
||||
Acquire.getBlockType -> Cat(my_operand_size, my_opcode, alloc),
|
||||
Acquire.putType -> Cat(my_wmask, alloc),
|
||||
Acquire.putBlockType -> Cat(my_wmask, alloc),
|
||||
Acquire.putAtomicType -> Cat(my_addr_byte, my_operand_size, my_opcode, alloc),
|
||||
Acquire.getPrefetchType -> Cat(M_XRD, alloc),
|
||||
Acquire.putPrefetchType -> Cat(M_XWR, alloc)))
|
||||
}
|
||||
|
||||
def fullWriteMask(implicit p: Parameters) = SInt(-1, width = p(TLKey(p(TLId))).writeMaskBits).asUInt
|
||||
def fullOperandSize(implicit p: Parameters) = {
|
||||
val dataBits = p(TLKey(p(TLId))).dataBitsPerBeat
|
||||
UInt(log2Ceil(dataBits / 8))
|
||||
}
|
||||
|
||||
// Most generic constructor
|
||||
def apply(
|
||||
is_builtin_type: Bool,
|
||||
a_type: Bits,
|
||||
client_xact_id: UInt,
|
||||
addr_block: UInt,
|
||||
addr_beat: UInt = UInt(0),
|
||||
data: UInt = UInt(0),
|
||||
union: UInt = UInt(0))
|
||||
(implicit p: Parameters): Acquire = {
|
||||
val acq = Wire(new Acquire)
|
||||
acq.is_builtin_type := is_builtin_type
|
||||
acq.a_type := a_type
|
||||
acq.client_xact_id := client_xact_id
|
||||
acq.addr_block := addr_block
|
||||
acq.addr_beat := addr_beat
|
||||
acq.data := data
|
||||
acq.union := union
|
||||
acq
|
||||
}
|
||||
|
||||
// Copy constructor
|
||||
def apply(a: Acquire): Acquire = {
|
||||
val acq = Wire(new Acquire()(a.p))
|
||||
acq := a
|
||||
acq
|
||||
}
|
||||
}
|
||||
|
||||
object BuiltInAcquireBuilder {
|
||||
def apply(
|
||||
a_type: UInt,
|
||||
client_xact_id: UInt,
|
||||
addr_block: UInt,
|
||||
addr_beat: UInt = UInt(0),
|
||||
data: UInt = UInt(0),
|
||||
addr_byte: UInt = UInt(0),
|
||||
operand_size: UInt = UInt(0),
|
||||
opcode: UInt = UInt(0),
|
||||
wmask: UInt = UInt(0),
|
||||
alloc: Bool = Bool(true))
|
||||
(implicit p: Parameters): Acquire = {
|
||||
Acquire(
|
||||
is_builtin_type = Bool(true),
|
||||
a_type = a_type,
|
||||
client_xact_id = client_xact_id,
|
||||
addr_block = addr_block,
|
||||
addr_beat = addr_beat,
|
||||
data = data,
|
||||
union = Acquire.makeUnion(a_type, addr_byte, operand_size, opcode, wmask, alloc))
|
||||
}
|
||||
}
|
||||
|
||||
/** Get a single beat of data from the outer memory hierarchy
|
||||
*
|
||||
* The client can hint whether he block containing this beat should be
|
||||
* allocated in the intervening levels of the hierarchy.
|
||||
*
|
||||
* @param client_xact_id client's transaction id
|
||||
* @param addr_block address of the cache block
|
||||
* @param addr_beat sub-block address (which beat)
|
||||
* @param addr_byte sub-block address (which byte)
|
||||
* @param operand_size {byte, half, word, double} from [[uncore.MemoryOpConstants]]
|
||||
* @param alloc hint whether the block should be allocated in intervening caches
|
||||
*/
|
||||
object Get {
|
||||
def apply(
|
||||
client_xact_id: UInt,
|
||||
addr_block: UInt,
|
||||
addr_beat: UInt,
|
||||
alloc: Bool = Bool(true))
|
||||
(implicit p: Parameters): Acquire = {
|
||||
BuiltInAcquireBuilder(
|
||||
a_type = Acquire.getType,
|
||||
client_xact_id = client_xact_id,
|
||||
addr_block = addr_block,
|
||||
addr_beat = addr_beat,
|
||||
operand_size = Acquire.fullOperandSize,
|
||||
opcode = M_XRD,
|
||||
alloc = alloc)
|
||||
}
|
||||
def apply(
|
||||
client_xact_id: UInt,
|
||||
addr_block: UInt,
|
||||
addr_beat: UInt,
|
||||
addr_byte: UInt,
|
||||
operand_size: UInt,
|
||||
alloc: Bool)
|
||||
(implicit p: Parameters): Acquire = {
|
||||
BuiltInAcquireBuilder(
|
||||
a_type = Acquire.getType,
|
||||
client_xact_id = client_xact_id,
|
||||
addr_block = addr_block,
|
||||
addr_beat = addr_beat,
|
||||
addr_byte = addr_byte,
|
||||
operand_size = operand_size,
|
||||
opcode = M_XRD,
|
||||
alloc = alloc)
|
||||
}
|
||||
}
|
||||
|
||||
/** Get a whole cache block of data from the outer memory hierarchy
|
||||
*
|
||||
* The client can hint whether the block should be allocated in the
|
||||
* intervening levels of the hierarchy.
|
||||
*
|
||||
* @param client_xact_id client's transaction id
|
||||
* @param addr_block address of the cache block
|
||||
* @param alloc hint whether the block should be allocated in intervening caches
|
||||
*/
|
||||
object GetBlock {
|
||||
def apply(
|
||||
client_xact_id: UInt = UInt(0),
|
||||
addr_block: UInt,
|
||||
alloc: Bool = Bool(true))
|
||||
(implicit p: Parameters): Acquire = {
|
||||
BuiltInAcquireBuilder(
|
||||
a_type = Acquire.getBlockType,
|
||||
client_xact_id = client_xact_id,
|
||||
addr_block = addr_block,
|
||||
operand_size = Acquire.fullOperandSize,
|
||||
opcode = M_XRD,
|
||||
alloc = alloc)
|
||||
}
|
||||
}
|
||||
|
||||
/** Prefetch a cache block into the next-outermost level of the memory hierarchy
|
||||
* with read permissions.
|
||||
*
|
||||
* @param client_xact_id client's transaction id
|
||||
* @param addr_block address of the cache block
|
||||
*/
|
||||
object GetPrefetch {
|
||||
def apply(
|
||||
client_xact_id: UInt,
|
||||
addr_block: UInt)
|
||||
(implicit p: Parameters): Acquire = {
|
||||
BuiltInAcquireBuilder(
|
||||
a_type = Acquire.getPrefetchType,
|
||||
client_xact_id = client_xact_id,
|
||||
addr_block = addr_block)
|
||||
}
|
||||
}
|
||||
|
||||
/** Put a single beat of data into the outer memory hierarchy
|
||||
*
|
||||
* The block will be allocated in the next-outermost level of the hierarchy.
|
||||
*
|
||||
* @param client_xact_id client's transaction id
|
||||
* @param addr_block address of the cache block
|
||||
* @param addr_beat sub-block address (which beat)
|
||||
* @param data data being refilled to the original requestor
|
||||
* @param wmask per-byte write mask for this beat
|
||||
* @param alloc hint whether the block should be allocated in intervening caches
|
||||
*/
|
||||
object Put {
|
||||
def apply(
|
||||
client_xact_id: UInt,
|
||||
addr_block: UInt,
|
||||
addr_beat: UInt,
|
||||
data: UInt,
|
||||
wmask: Option[UInt]= None,
|
||||
alloc: Bool = Bool(true))
|
||||
(implicit p: Parameters): Acquire = {
|
||||
BuiltInAcquireBuilder(
|
||||
a_type = Acquire.putType,
|
||||
addr_block = addr_block,
|
||||
addr_beat = addr_beat,
|
||||
client_xact_id = client_xact_id,
|
||||
data = data,
|
||||
wmask = wmask.getOrElse(Acquire.fullWriteMask),
|
||||
alloc = alloc)
|
||||
}
|
||||
}
|
||||
|
||||
/** Put a whole cache block of data into the outer memory hierarchy
|
||||
*
|
||||
* If the write mask is not full, the block will be allocated in the
|
||||
* next-outermost level of the hierarchy. If the write mask is full, the
|
||||
* client can hint whether the block should be allocated or not.
|
||||
*
|
||||
* @param client_xact_id client's transaction id
|
||||
* @param addr_block address of the cache block
|
||||
* @param addr_beat sub-block address (which beat of several)
|
||||
* @param data data being refilled to the original requestor
|
||||
* @param wmask per-byte write mask for this beat
|
||||
* @param alloc hint whether the block should be allocated in intervening caches
|
||||
*/
|
||||
object PutBlock {
|
||||
def apply(
|
||||
client_xact_id: UInt,
|
||||
addr_block: UInt,
|
||||
addr_beat: UInt,
|
||||
data: UInt,
|
||||
wmask: Option[UInt] = None,
|
||||
alloc: Bool = Bool(true))
|
||||
(implicit p: Parameters): Acquire = {
|
||||
BuiltInAcquireBuilder(
|
||||
a_type = Acquire.putBlockType,
|
||||
client_xact_id = client_xact_id,
|
||||
addr_block = addr_block,
|
||||
addr_beat = addr_beat,
|
||||
data = data,
|
||||
wmask = wmask.getOrElse(Acquire.fullWriteMask),
|
||||
alloc = alloc)
|
||||
}
|
||||
}
|
||||
|
||||
/** Prefetch a cache block into the next-outermost level of the memory hierarchy
|
||||
* with write permissions.
|
||||
*
|
||||
* @param client_xact_id client's transaction id
|
||||
* @param addr_block address of the cache block
|
||||
*/
|
||||
object PutPrefetch {
|
||||
def apply(
|
||||
client_xact_id: UInt,
|
||||
addr_block: UInt)
|
||||
(implicit p: Parameters): Acquire = {
|
||||
BuiltInAcquireBuilder(
|
||||
a_type = Acquire.putPrefetchType,
|
||||
client_xact_id = client_xact_id,
|
||||
addr_block = addr_block)
|
||||
}
|
||||
}
|
||||
|
||||
/** Perform an atomic memory operation in the next-outermost level of the memory hierarchy
|
||||
*
|
||||
* @param client_xact_id client's transaction id
|
||||
* @param addr_block address of the cache block
|
||||
* @param addr_beat sub-block address (within which beat)
|
||||
* @param addr_byte sub-block address (which byte)
|
||||
* @param atomic_opcode {swap, add, xor, and, min, max, minu, maxu} from [[uncore.MemoryOpConstants]]
|
||||
* @param operand_size {byte, half, word, double} from [[uncore.MemoryOpConstants]]
|
||||
* @param data source operand data
|
||||
*/
|
||||
object PutAtomic {
|
||||
def apply(
|
||||
client_xact_id: UInt,
|
||||
addr_block: UInt,
|
||||
addr_beat: UInt,
|
||||
addr_byte: UInt,
|
||||
atomic_opcode: UInt,
|
||||
operand_size: UInt,
|
||||
data: UInt)
|
||||
(implicit p: Parameters): Acquire = {
|
||||
BuiltInAcquireBuilder(
|
||||
a_type = Acquire.putAtomicType,
|
||||
client_xact_id = client_xact_id,
|
||||
addr_block = addr_block,
|
||||
addr_beat = addr_beat,
|
||||
data = data,
|
||||
addr_byte = addr_byte,
|
||||
operand_size = operand_size,
|
||||
opcode = atomic_opcode)
|
||||
}
|
||||
}
|
||||
|
||||
/** The Probe channel is used to force clients to release data or cede permissions
|
||||
* on a cache block. Clients respond to Probes with [[uncore.Release]] messages.
|
||||
* The available types of Probes are customized by a particular
|
||||
* [[uncore.CoherencePolicy]].
|
||||
*/
|
||||
class Probe(implicit p: Parameters) extends ManagerToClientChannel
|
||||
with HasCacheBlockAddress
|
||||
with HasProbeType
|
||||
|
||||
/** [[uncore.Probe]] with an extra field stating its destination id */
|
||||
class ProbeToDst(implicit p: Parameters) extends Probe()(p) with HasClientId
|
||||
|
||||
/** Contains factories for [[uncore.Probe]] and [[uncore.ProbeToDst]]
|
||||
*
|
||||
* In general you should avoid using these factories directly and use
|
||||
* [[uncore.ManagerMetadata.makeProbe(UInt,Acquire)* makeProbe]] instead.
|
||||
*
|
||||
* @param dst id of client to which probe should be sent
|
||||
* @param p_type custom probe type
|
||||
* @param addr_block address of the cache block
|
||||
*/
|
||||
object Probe {
|
||||
def apply(p_type: UInt, addr_block: UInt)(implicit p: Parameters): Probe = {
|
||||
val prb = Wire(new Probe)
|
||||
prb.p_type := p_type
|
||||
prb.addr_block := addr_block
|
||||
prb
|
||||
}
|
||||
def apply(dst: UInt, p_type: UInt, addr_block: UInt)(implicit p: Parameters): ProbeToDst = {
|
||||
val prb = Wire(new ProbeToDst)
|
||||
prb.client_id := dst
|
||||
prb.p_type := p_type
|
||||
prb.addr_block := addr_block
|
||||
prb
|
||||
}
|
||||
}
|
||||
|
||||
/** The Release channel is used to release data or permission back to the manager
|
||||
* in response to [[uncore.Probe]] messages. It can also be used to voluntarily
|
||||
* write back data, for example in the event that dirty data must be evicted on
|
||||
* a cache miss. The available types of Release messages are always customized by
|
||||
* a particular [[uncore.CoherencePolicy]]. Releases may contain data or may be
|
||||
* simple acknowledgements. Voluntary Releases are acknowledged with [[uncore.Grant Grants]].
|
||||
*/
|
||||
class ReleaseMetadata(implicit p: Parameters) extends ClientToManagerChannel
|
||||
with HasTileLinkBeatId
|
||||
with HasCacheBlockAddress
|
||||
with HasClientTransactionId
|
||||
with HasReleaseType {
|
||||
def full_addr(dummy: Int = 0) = Cat(this.addr_block, this.addr_beat, UInt(0, width = tlByteAddrBits))
|
||||
}
|
||||
|
||||
/** [[uncore.ReleaseMetadata]] with an extra field containing the data beat */
|
||||
class Release(implicit p: Parameters) extends ReleaseMetadata
|
||||
with HasTileLinkData
|
||||
|
||||
/** [[uncore.ReleaseMetadata]] with an extra field containing the entire cache block */
|
||||
class BufferedRelease(implicit p: Parameters) extends ReleaseMetadata
|
||||
with HasTileLinkBlock
|
||||
|
||||
/** [[uncore.Release]] with an extra field stating its source id */
|
||||
class ReleaseFromSrc(implicit p: Parameters) extends Release
|
||||
with HasClientId
|
||||
|
||||
/** [[uncore.BufferedRelease]] with an extra field stating its source id */
|
||||
class BufferedReleaseFromSrc(implicit p: Parameters) extends BufferedRelease
|
||||
with HasClientId
|
||||
|
||||
/** Contains a [[uncore.Release]] factory
|
||||
*
|
||||
* In general you should avoid using this factory directly and use
|
||||
* [[uncore.ClientMetadata.makeRelease]] instead.
|
||||
*
|
||||
* @param voluntary is this a voluntary writeback
|
||||
* @param r_type type enum defined by coherence protocol
|
||||
* @param client_xact_id client's transaction id
|
||||
* @param addr_block address of the cache block
|
||||
* @param addr_beat beat id of the data
|
||||
* @param data data being written back
|
||||
*/
|
||||
object Release {
|
||||
def apply(
|
||||
voluntary: Bool,
|
||||
r_type: UInt,
|
||||
client_xact_id: UInt,
|
||||
addr_block: UInt,
|
||||
addr_beat: UInt,
|
||||
data: UInt)
|
||||
(implicit p: Parameters): Release = {
|
||||
val rel = Wire(new Release)
|
||||
rel.r_type := r_type
|
||||
rel.client_xact_id := client_xact_id
|
||||
rel.addr_block := addr_block
|
||||
rel.addr_beat := addr_beat
|
||||
rel.data := data
|
||||
rel.voluntary := voluntary
|
||||
rel
|
||||
}
|
||||
|
||||
def apply(
|
||||
src: UInt,
|
||||
voluntary: Bool,
|
||||
r_type: UInt,
|
||||
client_xact_id: UInt,
|
||||
addr_block: UInt,
|
||||
addr_beat: UInt = UInt(0),
|
||||
data: UInt = UInt(0))
|
||||
(implicit p: Parameters): ReleaseFromSrc = {
|
||||
val rel = Wire(new ReleaseFromSrc)
|
||||
rel.client_id := src
|
||||
rel.voluntary := voluntary
|
||||
rel.r_type := r_type
|
||||
rel.client_xact_id := client_xact_id
|
||||
rel.addr_block := addr_block
|
||||
rel.addr_beat := addr_beat
|
||||
rel.data := data
|
||||
rel
|
||||
}
|
||||
}
|
||||
|
||||
/** The Grant channel is used to refill data or grant permissions requested of the
|
||||
* manager agent via an [[uncore.Acquire]] message. It is also used to acknowledge
|
||||
* the receipt of voluntary writeback from clients in the form of [[uncore.Release]]
|
||||
* messages. There are built-in Grant messages used for Gets and Puts, and
|
||||
* coherence policies may also define custom Grant types. Grants may contain data
|
||||
* or may be simple acknowledgements. Grants are responded to with [[uncore.Finish]].
|
||||
*/
|
||||
class GrantMetadata(implicit p: Parameters) extends ManagerToClientChannel
|
||||
with HasTileLinkBeatId
|
||||
with HasClientTransactionId
|
||||
with HasManagerTransactionId
|
||||
with HasGrantType {
|
||||
def makeFinish(dummy: Int = 0): Finish = {
|
||||
val f = Wire(new Finish)
|
||||
f.manager_xact_id := this.manager_xact_id
|
||||
f
|
||||
}
|
||||
}
|
||||
|
||||
/** [[uncore.GrantMetadata]] with an extra field containing a single beat of data */
|
||||
class Grant(implicit p: Parameters) extends GrantMetadata
|
||||
with HasTileLinkData
|
||||
|
||||
/** [[uncore.Grant]] with an extra field stating its destination */
|
||||
class GrantToDst(implicit p: Parameters) extends Grant
|
||||
with HasClientId
|
||||
|
||||
/** [[uncore.Grant]] with an extra field stating its destination */
|
||||
class GrantFromSrc(implicit p: Parameters) extends Grant
|
||||
with HasManagerId {
|
||||
override def makeFinish(dummy: Int = 0): FinishToDst = {
|
||||
val f = Wire(new FinishToDst)
|
||||
f.manager_xact_id := this.manager_xact_id
|
||||
f.manager_id := this.manager_id
|
||||
f
|
||||
}
|
||||
}
|
||||
|
||||
/** [[uncore.GrantMetadata]] with an extra field containing an entire cache block */
|
||||
class BufferedGrant(implicit p: Parameters) extends GrantMetadata
|
||||
with HasTileLinkBlock
|
||||
|
||||
/** [[uncore.BufferedGrant]] with an extra field stating its destination */
|
||||
class BufferedGrantToDst(implicit p: Parameters) extends BufferedGrant
|
||||
with HasClientId
|
||||
|
||||
/** Contains definitions of the the built-in grant types and factories
|
||||
* for [[uncore.Grant]] and [[uncore.GrantToDst]]
|
||||
*
|
||||
* In general you should avoid using these factories directly and use
|
||||
* [[uncore.ManagerMetadata.makeGrant(uncore.AcquireFromSrc* makeGrant]] instead.
|
||||
*
|
||||
* @param dst id of client to which grant should be sent
|
||||
* @param is_builtin_type built-in or custom type message?
|
||||
* @param g_type built-in type enum or custom type enum
|
||||
* @param client_xact_id client's transaction id
|
||||
* @param manager_xact_id manager's transaction id
|
||||
* @param addr_beat beat id of the data
|
||||
* @param data data being refilled to the original requestor
|
||||
*/
|
||||
object Grant {
|
||||
val nBuiltInTypes = 5
|
||||
def voluntaryAckType = UInt("b000") // For acking Releases
|
||||
def prefetchAckType = UInt("b001") // For acking any kind of Prefetch
|
||||
def putAckType = UInt("b011") // For acking any kind of non-prfetch Put
|
||||
def getDataBeatType = UInt("b100") // Supplying a single beat of Get
|
||||
def getDataBlockType = UInt("b101") // Supplying all beats of a GetBlock
|
||||
def typesWithData = Vec(getDataBlockType, getDataBeatType)
|
||||
def typesWithMultibeatData= Vec(getDataBlockType)
|
||||
|
||||
def apply(
|
||||
is_builtin_type: Bool,
|
||||
g_type: UInt,
|
||||
client_xact_id: UInt,
|
||||
manager_xact_id: UInt,
|
||||
addr_beat: UInt,
|
||||
data: UInt)
|
||||
(implicit p: Parameters): Grant = {
|
||||
val gnt = Wire(new Grant)
|
||||
gnt.is_builtin_type := is_builtin_type
|
||||
gnt.g_type := g_type
|
||||
gnt.client_xact_id := client_xact_id
|
||||
gnt.manager_xact_id := manager_xact_id
|
||||
gnt.addr_beat := addr_beat
|
||||
gnt.data := data
|
||||
gnt
|
||||
}
|
||||
|
||||
def apply(
|
||||
dst: UInt,
|
||||
is_builtin_type: Bool,
|
||||
g_type: UInt,
|
||||
client_xact_id: UInt,
|
||||
manager_xact_id: UInt,
|
||||
addr_beat: UInt = UInt(0),
|
||||
data: UInt = UInt(0))
|
||||
(implicit p: Parameters): GrantToDst = {
|
||||
val gnt = Wire(new GrantToDst)
|
||||
gnt.client_id := dst
|
||||
gnt.is_builtin_type := is_builtin_type
|
||||
gnt.g_type := g_type
|
||||
gnt.client_xact_id := client_xact_id
|
||||
gnt.manager_xact_id := manager_xact_id
|
||||
gnt.addr_beat := addr_beat
|
||||
gnt.data := data
|
||||
gnt
|
||||
}
|
||||
}
|
||||
|
||||
/** The Finish channel is used to provide a global ordering of transactions
|
||||
* in networks that do not guarantee point-to-point ordering of messages.
|
||||
* A Finsish message is sent as acknowledgement of receipt of a [[uncore.Grant]].
|
||||
* When a Finish message is received, a manager knows it is safe to begin
|
||||
* processing other transactions that touch the same cache block.
|
||||
*/
|
||||
class Finish(implicit p: Parameters) extends ClientToManagerChannel()(p)
|
||||
with HasManagerTransactionId {
|
||||
def hasData(dummy: Int = 0) = Bool(false)
|
||||
def hasMultibeatData(dummy: Int = 0) = Bool(false)
|
||||
}
|
||||
|
||||
/** [[uncore.Finish]] with an extra field stating its destination */
|
||||
class FinishToDst(implicit p: Parameters) extends Finish
|
||||
with HasManagerId
|
||||
|
||||
/** Complete IO definition for incoherent TileLink, including networking headers */
|
||||
class UncachedTileLinkIO(implicit p: Parameters) extends TLBundle()(p) {
|
||||
val acquire = new DecoupledIO(new LogicalNetworkIO(new Acquire))
|
||||
val grant = new DecoupledIO(new LogicalNetworkIO(new Grant)).flip
|
||||
val finish = new DecoupledIO(new LogicalNetworkIO(new Finish))
|
||||
}
|
||||
|
||||
/** Complete IO definition for coherent TileLink, including networking headers */
|
||||
class TileLinkIO(implicit p: Parameters) extends UncachedTileLinkIO()(p) {
|
||||
val probe = new DecoupledIO(new LogicalNetworkIO(new Probe)).flip
|
||||
val release = new DecoupledIO(new LogicalNetworkIO(new Release))
|
||||
}
|
||||
|
||||
/** This version of UncachedTileLinkIO does not contain network headers.
|
||||
* It is intended for use within client agents.
|
||||
*
|
||||
* Headers are provided in the top-level that instantiates the clients and network,
|
||||
* probably using a [[uncore.ClientTileLinkNetworkPort]] module.
|
||||
* By eliding the header subbundles within the clients we can enable
|
||||
* hierarchical P-and-R while minimizing unconnected port errors in GDS.
|
||||
*
|
||||
* Secondly, this version of the interface elides [[uncore.Finish]] messages, with the
|
||||
* assumption that a [[uncore.FinishUnit]] has been coupled to the TileLinkIO port
|
||||
* to deal with acking received [[uncore.Grant Grants]].
|
||||
*/
|
||||
class ClientUncachedTileLinkIO(implicit p: Parameters) extends TLBundle()(p) {
|
||||
val acquire = new DecoupledIO(new Acquire)
|
||||
val grant = new DecoupledIO(new Grant).flip
|
||||
}
|
||||
|
||||
/** This version of TileLinkIO does not contain network headers.
|
||||
* It is intended for use within client agents.
|
||||
*/
|
||||
class ClientTileLinkIO(implicit p: Parameters) extends TLBundle()(p) {
|
||||
val acquire = new DecoupledIO(new Acquire)
|
||||
val probe = new DecoupledIO(new Probe).flip
|
||||
val release = new DecoupledIO(new Release)
|
||||
val grant = new DecoupledIO(new GrantFromSrc).flip
|
||||
val finish = new DecoupledIO(new FinishToDst)
|
||||
}
|
||||
|
||||
/** This version of TileLinkIO does not contain network headers, but
|
||||
* every channel does include an extra client_id subbundle.
|
||||
* It is intended for use within Management agents.
|
||||
*
|
||||
* Managers need to track where [[uncore.Acquire]] and [[uncore.Release]] messages
|
||||
* originated so that they can send a [[uncore.Grant]] to the right place.
|
||||
* Similarly they must be able to issues Probes to particular clients.
|
||||
* However, we'd still prefer to have [[uncore.ManagerTileLinkNetworkPort]] fill in
|
||||
* the header.src to enable hierarchical p-and-r of the managers. Additionally,
|
||||
* coherent clients might be mapped to random network port ids, and we'll leave it to the
|
||||
* [[uncore.ManagerTileLinkNetworkPort]] to apply the correct mapping. Managers do need to
|
||||
* see Finished so they know when to allow new transactions on a cache
|
||||
* block to proceed.
|
||||
*/
|
||||
class ManagerTileLinkIO(implicit p: Parameters) extends TLBundle()(p) {
|
||||
val acquire = new DecoupledIO(new AcquireFromSrc).flip
|
||||
val grant = new DecoupledIO(new GrantToDst)
|
||||
val finish = new DecoupledIO(new Finish).flip
|
||||
val probe = new DecoupledIO(new ProbeToDst)
|
||||
val release = new DecoupledIO(new ReleaseFromSrc).flip
|
||||
}
|
||||
386
src/main/scala/uncore/tilelink/Interconnect.scala
Normal file
386
src/main/scala/uncore/tilelink/Interconnect.scala
Normal file
@@ -0,0 +1,386 @@
|
||||
package uncore.tilelink
|
||||
|
||||
import Chisel._
|
||||
import junctions._
|
||||
import scala.collection.mutable.ArraySeq
|
||||
import uncore.util._
|
||||
import cde.{Parameters, Field}
|
||||
|
||||
|
||||
/** PortedTileLinkNetworks combine a TileLink protocol with a particular physical
|
||||
* network implementation.
|
||||
*
|
||||
* Specifically, they provide mappings between ClientTileLinkIO/
|
||||
* ManagerTileLinkIO channels and LogicalNetwork ports (i.e. generic
|
||||
* TileLinkIO with networking headers). Channels coming into the network have
|
||||
* appropriate networking headers appended and outgoing channels have their
|
||||
* headers stripped.
|
||||
*
|
||||
* @constructor base class constructor for Ported TileLink NoC
|
||||
* @param addrToManagerId a mapping from a physical address to the network
|
||||
* id of a coherence manager
|
||||
* @param sharerToClientId a mapping from the id of a particular coherent
|
||||
* client (as determined by e.g. the directory) and the network id
|
||||
* of that client
|
||||
* @param clientDepths the depths of the queue that should be used to buffer
|
||||
* each channel on the client side of the network
|
||||
* @param managerDepths the depths of the queue that should be used to buffer
|
||||
* each channel on the manager side of the network
|
||||
*/
|
||||
abstract class PortedTileLinkNetwork(
|
||||
addrToManagerId: UInt => UInt,
|
||||
sharerToClientId: UInt => UInt,
|
||||
clientDepths: TileLinkDepths,
|
||||
managerDepths: TileLinkDepths)
|
||||
(implicit p: Parameters) extends TLModule()(p) {
|
||||
val nClients = tlNClients
|
||||
val nManagers = tlNManagers
|
||||
val io = new Bundle {
|
||||
val clients_cached = Vec(tlNCachingClients, new ClientTileLinkIO).flip
|
||||
val clients_uncached = Vec(tlNCachelessClients, new ClientUncachedTileLinkIO).flip
|
||||
val managers = Vec(nManagers, new ManagerTileLinkIO).flip
|
||||
}
|
||||
|
||||
val clients = (io.clients_cached ++ io.clients_uncached).zipWithIndex.map {
|
||||
case (io, idx) => {
|
||||
val qs = Module(new TileLinkEnqueuer(clientDepths))
|
||||
io match {
|
||||
case c: ClientTileLinkIO => {
|
||||
val port = Module(new ClientTileLinkNetworkPort(idx, addrToManagerId))
|
||||
port.io.client <> c
|
||||
qs.io.client <> port.io.network
|
||||
qs.io.manager
|
||||
}
|
||||
case u: ClientUncachedTileLinkIO => {
|
||||
val port = Module(new ClientUncachedTileLinkNetworkPort(idx, addrToManagerId))
|
||||
port.io.client <> u
|
||||
qs.io.client <> port.io.network
|
||||
qs.io.manager
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
val managers = io.managers.zipWithIndex.map {
|
||||
case (m, i) => {
|
||||
val port = Module(new ManagerTileLinkNetworkPort(i, sharerToClientId))
|
||||
val qs = Module(new TileLinkEnqueuer(managerDepths))
|
||||
port.io.manager <> m
|
||||
port.io.network <> qs.io.manager
|
||||
qs.io.client
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** A simple arbiter for each channel that also deals with header-based routing.
|
||||
* Assumes a single manager agent. */
|
||||
class PortedTileLinkArbiter(
|
||||
sharerToClientId: UInt => UInt = (u: UInt) => u,
|
||||
clientDepths: TileLinkDepths = TileLinkDepths(0,0,0,0,0),
|
||||
managerDepths: TileLinkDepths = TileLinkDepths(0,0,0,0,0))
|
||||
(implicit p: Parameters)
|
||||
extends PortedTileLinkNetwork(u => UInt(0), sharerToClientId, clientDepths, managerDepths)(p)
|
||||
with TileLinkArbiterLike
|
||||
with PassesId {
|
||||
val arbN = nClients
|
||||
require(nManagers == 1)
|
||||
if(arbN > 1) {
|
||||
hookupClientSource(clients.map(_.acquire), managers.head.acquire)
|
||||
hookupClientSource(clients.map(_.release), managers.head.release)
|
||||
hookupFinish(clients.map(_.finish), managers.head.finish)
|
||||
hookupManagerSourceWithHeader(clients.map(_.probe), managers.head.probe)
|
||||
hookupManagerSourceWithHeader(clients.map(_.grant), managers.head.grant)
|
||||
} else {
|
||||
managers.head <> clients.head
|
||||
}
|
||||
}
|
||||
|
||||
/** Provides a separate physical crossbar for each channel. Assumes multiple manager
|
||||
* agents. Managers are assigned to higher physical network port ids than
|
||||
* clients, and translations between logical network id and physical crossbar
|
||||
* port id are done automatically.
|
||||
*/
|
||||
class PortedTileLinkCrossbar(
|
||||
addrToManagerId: UInt => UInt = u => UInt(0),
|
||||
sharerToClientId: UInt => UInt = u => u,
|
||||
clientDepths: TileLinkDepths = TileLinkDepths(0,0,0,0,0),
|
||||
managerDepths: TileLinkDepths = TileLinkDepths(0,0,0,0,0))
|
||||
(implicit p: Parameters)
|
||||
extends PortedTileLinkNetwork(addrToManagerId, sharerToClientId, clientDepths, managerDepths)(p) {
|
||||
val n = p(LNEndpoints)
|
||||
val phyHdrWidth = log2Up(n)
|
||||
val count = tlDataBeats
|
||||
// Actually instantiate the particular networks required for TileLink
|
||||
val acqNet = Module(new BasicBus(CrossbarConfig(n, new Acquire, count, Some((a: PhysicalNetworkIO[Acquire]) => a.payload.hasMultibeatData()))))
|
||||
val relNet = Module(new BasicBus(CrossbarConfig(n, new Release, count, Some((r: PhysicalNetworkIO[Release]) => r.payload.hasMultibeatData()))))
|
||||
val prbNet = Module(new BasicBus(CrossbarConfig(n, new Probe)))
|
||||
val gntNet = Module(new BasicBus(CrossbarConfig(n, new Grant, count, Some((g: PhysicalNetworkIO[Grant]) => g.payload.hasMultibeatData()))))
|
||||
val ackNet = Module(new BasicBus(CrossbarConfig(n, new Finish)))
|
||||
|
||||
// Aliases for the various network IO bundle types
|
||||
type PNIO[T <: Data] = DecoupledIO[PhysicalNetworkIO[T]]
|
||||
type LNIO[T <: Data] = DecoupledIO[LogicalNetworkIO[T]]
|
||||
type FromCrossbar[T <: Data] = PNIO[T] => LNIO[T]
|
||||
type ToCrossbar[T <: Data] = LNIO[T] => PNIO[T]
|
||||
|
||||
// Shims for converting between logical network IOs and physical network IOs
|
||||
def crossbarToManagerShim[T <: Data](in: PNIO[T]): LNIO[T] = {
|
||||
val out = DefaultFromPhysicalShim(in)
|
||||
out.bits.header.src := in.bits.header.src - UInt(nManagers)
|
||||
out
|
||||
}
|
||||
def crossbarToClientShim[T <: Data](in: PNIO[T]): LNIO[T] = {
|
||||
val out = DefaultFromPhysicalShim(in)
|
||||
out.bits.header.dst := in.bits.header.dst - UInt(nManagers)
|
||||
out
|
||||
}
|
||||
def managerToCrossbarShim[T <: Data](in: LNIO[T]): PNIO[T] = {
|
||||
val out = DefaultToPhysicalShim(n, in)
|
||||
out.bits.header.dst := in.bits.header.dst + UInt(nManagers, phyHdrWidth)
|
||||
out
|
||||
}
|
||||
def clientToCrossbarShim[T <: Data](in: LNIO[T]): PNIO[T] = {
|
||||
val out = DefaultToPhysicalShim(n, in)
|
||||
out.bits.header.src := in.bits.header.src + UInt(nManagers, phyHdrWidth)
|
||||
out
|
||||
}
|
||||
|
||||
// Make an individual connection between virtual and physical ports using
|
||||
// a particular shim. Also pin the unused Decoupled control signal low.
|
||||
def doDecoupledInputHookup[T <: Data](phys_in: PNIO[T], phys_out: PNIO[T], log_io: LNIO[T], shim: ToCrossbar[T]) = {
|
||||
val s = shim(log_io)
|
||||
phys_in.valid := s.valid
|
||||
phys_in.bits := s.bits
|
||||
s.ready := phys_in.ready
|
||||
phys_out.ready := Bool(false)
|
||||
}
|
||||
|
||||
def doDecoupledOutputHookup[T <: Data](phys_in: PNIO[T], phys_out: PNIO[T], log_io: LNIO[T], shim: FromCrossbar[T]) = {
|
||||
val s = shim(phys_out)
|
||||
log_io.valid := s.valid
|
||||
log_io.bits := s.bits
|
||||
s.ready := log_io.ready
|
||||
phys_in.valid := Bool(false)
|
||||
}
|
||||
|
||||
//Hookup all instances of a particular subbundle of TileLink
|
||||
def doDecoupledHookups[T <: Data](physIO: BasicCrossbarIO[T], getLogIO: TileLinkIO => LNIO[T]) = {
|
||||
physIO.in.head.bits.payload match {
|
||||
case c: ClientToManagerChannel => {
|
||||
managers.zipWithIndex.map { case (i, id) =>
|
||||
doDecoupledOutputHookup(physIO.in(id), physIO.out(id), getLogIO(i), crossbarToManagerShim[T])
|
||||
}
|
||||
clients.zipWithIndex.map { case (i, id) =>
|
||||
doDecoupledInputHookup(physIO.in(id+nManagers), physIO.out(id+nManagers), getLogIO(i), clientToCrossbarShim[T])
|
||||
}
|
||||
}
|
||||
case m: ManagerToClientChannel => {
|
||||
managers.zipWithIndex.map { case (i, id) =>
|
||||
doDecoupledInputHookup(physIO.in(id), physIO.out(id), getLogIO(i), managerToCrossbarShim[T])
|
||||
}
|
||||
clients.zipWithIndex.map { case (i, id) =>
|
||||
doDecoupledOutputHookup(physIO.in(id+nManagers), physIO.out(id+nManagers), getLogIO(i), crossbarToClientShim[T])
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
doDecoupledHookups(acqNet.io, (tl: TileLinkIO) => tl.acquire)
|
||||
doDecoupledHookups(relNet.io, (tl: TileLinkIO) => tl.release)
|
||||
doDecoupledHookups(prbNet.io, (tl: TileLinkIO) => tl.probe)
|
||||
doDecoupledHookups(gntNet.io, (tl: TileLinkIO) => tl.grant)
|
||||
doDecoupledHookups(ackNet.io, (tl: TileLinkIO) => tl.finish)
|
||||
}
|
||||
|
||||
class ClientUncachedTileLinkIORouter(
|
||||
nOuter: Int, routeSel: UInt => UInt)(implicit p: Parameters)
|
||||
extends TLModule {
|
||||
|
||||
val io = new Bundle {
|
||||
val in = (new ClientUncachedTileLinkIO).flip
|
||||
val out = Vec(nOuter, new ClientUncachedTileLinkIO)
|
||||
}
|
||||
|
||||
val acq_route = routeSel(io.in.acquire.bits.full_addr())
|
||||
|
||||
io.in.acquire.ready := Bool(false)
|
||||
|
||||
io.out.zipWithIndex.foreach { case (out, i) =>
|
||||
out.acquire.valid := io.in.acquire.valid && acq_route(i)
|
||||
out.acquire.bits := io.in.acquire.bits
|
||||
when (acq_route(i)) { io.in.acquire.ready := out.acquire.ready }
|
||||
}
|
||||
|
||||
val gnt_arb = Module(new LockingRRArbiter(
|
||||
new Grant, nOuter, tlDataBeats, Some((gnt: Grant) => gnt.hasMultibeatData())))
|
||||
gnt_arb.io.in <> io.out.map(_.grant)
|
||||
io.in.grant <> gnt_arb.io.out
|
||||
|
||||
assert(!io.in.acquire.valid || acq_route.orR, "No valid route")
|
||||
}
|
||||
|
||||
class TileLinkInterconnectIO(val nInner: Int, val nOuter: Int)
|
||||
(implicit p: Parameters) extends Bundle {
|
||||
val in = Vec(nInner, new ClientUncachedTileLinkIO).flip
|
||||
val out = Vec(nOuter, new ClientUncachedTileLinkIO)
|
||||
}
|
||||
|
||||
class ClientUncachedTileLinkIOCrossbar(
|
||||
nInner: Int, nOuter: Int, routeSel: UInt => UInt)
|
||||
(implicit p: Parameters) extends TLModule {
|
||||
|
||||
val io = new TileLinkInterconnectIO(nInner, nOuter)
|
||||
|
||||
if (nInner == 1) {
|
||||
val router = Module(new ClientUncachedTileLinkIORouter(nOuter, routeSel))
|
||||
router.io.in <> io.in.head
|
||||
io.out <> router.io.out
|
||||
} else {
|
||||
val routers = List.fill(nInner) {
|
||||
Module(new ClientUncachedTileLinkIORouter(nOuter, routeSel)) }
|
||||
val arbiters = List.fill(nOuter) {
|
||||
Module(new ClientUncachedTileLinkIOArbiter(nInner)) }
|
||||
|
||||
for (i <- 0 until nInner) {
|
||||
routers(i).io.in <> io.in(i)
|
||||
}
|
||||
|
||||
for (i <- 0 until nOuter) {
|
||||
arbiters(i).io.in <> routers.map(r => r.io.out(i))
|
||||
io.out(i) <> arbiters(i).io.out
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
abstract class TileLinkInterconnect(implicit p: Parameters) extends TLModule()(p) {
|
||||
val nInner: Int
|
||||
val nOuter: Int
|
||||
|
||||
lazy val io = new TileLinkInterconnectIO(nInner, nOuter)
|
||||
}
|
||||
|
||||
class TileLinkRecursiveInterconnect(val nInner: Int, addrMap: AddrMap)
|
||||
(implicit p: Parameters) extends TileLinkInterconnect()(p) {
|
||||
def port(name: String) = io.out(addrMap.port(name))
|
||||
val nOuter = addrMap.numSlaves
|
||||
val routeSel = (addr: UInt) =>
|
||||
Cat(addrMap.entries.map(e => addrMap(e.name).containsAddress(addr)).reverse)
|
||||
|
||||
val xbar = Module(new ClientUncachedTileLinkIOCrossbar(nInner, addrMap.length, routeSel))
|
||||
xbar.io.in <> io.in
|
||||
|
||||
io.out <> addrMap.entries.zip(xbar.io.out).flatMap {
|
||||
case (entry, xbarOut) => {
|
||||
entry.region match {
|
||||
case submap: AddrMap if submap.isEmpty =>
|
||||
xbarOut.acquire.ready := Bool(false)
|
||||
xbarOut.grant.valid := Bool(false)
|
||||
None
|
||||
case submap: AddrMap if !submap.collapse =>
|
||||
val ic = Module(new TileLinkRecursiveInterconnect(1, submap))
|
||||
ic.io.in.head <> xbarOut
|
||||
ic.io.out
|
||||
case _ =>
|
||||
Some(xbarOut)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
class TileLinkMemoryInterconnect(
|
||||
nBanksPerChannel: Int, nChannels: Int)
|
||||
(implicit p: Parameters) extends TileLinkInterconnect()(p) {
|
||||
|
||||
val nBanks = nBanksPerChannel * nChannels
|
||||
val nInner = nBanks
|
||||
val nOuter = nChannels
|
||||
|
||||
def connectChannel(outer: ClientUncachedTileLinkIO, inner: ClientUncachedTileLinkIO) {
|
||||
outer <> inner
|
||||
outer.acquire.bits.addr_block := inner.acquire.bits.addr_block >> UInt(log2Ceil(nChannels))
|
||||
}
|
||||
|
||||
for (i <- 0 until nChannels) {
|
||||
/* Bank assignments to channels are strided so that consecutive banks
|
||||
* map to different channels. That way, consecutive cache lines also
|
||||
* map to different channels */
|
||||
val banks = (i until nBanks by nChannels).map(j => io.in(j))
|
||||
|
||||
val channelArb = Module(new ClientUncachedTileLinkIOArbiter(nBanksPerChannel))
|
||||
channelArb.io.in <> banks
|
||||
connectChannel(io.out(i), channelArb.io.out)
|
||||
}
|
||||
}
|
||||
|
||||
/** Allows users to switch between various memory configurations. Note that
|
||||
* this is a dangerous operation: not only does switching the select input to
|
||||
* this module violate TileLink, it also causes the memory of the machine to
|
||||
* become garbled. It's expected that select only changes at boot time, as
|
||||
* part of the memory controller configuration. */
|
||||
class TileLinkMemorySelectorIO(val nBanks: Int, val maxMemChannels: Int, nConfigs: Int)
|
||||
(implicit p: Parameters)
|
||||
extends TileLinkInterconnectIO(nBanks, maxMemChannels) {
|
||||
val select = UInt(INPUT, width = log2Up(nConfigs))
|
||||
override def cloneType =
|
||||
new TileLinkMemorySelectorIO(nBanks, maxMemChannels, nConfigs).asInstanceOf[this.type]
|
||||
}
|
||||
|
||||
class TileLinkMemorySelector(nBanks: Int, maxMemChannels: Int, configs: Seq[Int])
|
||||
(implicit p: Parameters)
|
||||
extends TileLinkInterconnect()(p) {
|
||||
val nInner = nBanks
|
||||
val nOuter = maxMemChannels
|
||||
val nConfigs = configs.size
|
||||
|
||||
override lazy val io = new TileLinkMemorySelectorIO(nBanks, maxMemChannels, nConfigs)
|
||||
|
||||
def muxOnSelect[T <: Data](up: DecoupledIO[T], dn: DecoupledIO[T], active: Bool): Unit = {
|
||||
when (active) { dn.bits := up.bits }
|
||||
when (active) { up.ready := dn.ready }
|
||||
when (active) { dn.valid := up.valid }
|
||||
}
|
||||
|
||||
def muxOnSelect(up: ClientUncachedTileLinkIO, dn: ClientUncachedTileLinkIO, active: Bool): Unit = {
|
||||
muxOnSelect(up.acquire, dn.acquire, active)
|
||||
muxOnSelect(dn.grant, up.grant, active)
|
||||
}
|
||||
|
||||
def muxOnSelect(up: Vec[ClientUncachedTileLinkIO], dn: Vec[ClientUncachedTileLinkIO], active: Bool) : Unit = {
|
||||
for (i <- 0 until up.size)
|
||||
muxOnSelect(up(i), dn(i), active)
|
||||
}
|
||||
|
||||
/* Disconnects a vector of TileLink ports, which involves setting them to
|
||||
* invalid. Due to Chisel reasons, we need to also set the bits to 0 (since
|
||||
* there can't be any unconnected inputs). */
|
||||
def disconnectOuter(outer: Vec[ClientUncachedTileLinkIO]) = {
|
||||
outer.foreach{ m =>
|
||||
m.acquire.valid := Bool(false)
|
||||
m.acquire.bits := m.acquire.bits.fromBits(UInt(0))
|
||||
m.grant.ready := Bool(false)
|
||||
}
|
||||
}
|
||||
|
||||
def disconnectInner(inner: Vec[ClientUncachedTileLinkIO]) = {
|
||||
inner.foreach { m =>
|
||||
m.grant.valid := Bool(false)
|
||||
m.grant.bits := m.grant.bits.fromBits(UInt(0))
|
||||
m.acquire.ready := Bool(false)
|
||||
}
|
||||
}
|
||||
|
||||
/* Provides default wires on all our outputs. */
|
||||
disconnectOuter(io.out)
|
||||
disconnectInner(io.in)
|
||||
|
||||
/* Constructs interconnects for each of the layouts suggested by the
|
||||
* configuration and switches between them based on the select input. */
|
||||
configs.zipWithIndex.foreach{ case (nChannels, select) =>
|
||||
val nBanksPerChannel = nBanks / nChannels
|
||||
val ic = Module(new TileLinkMemoryInterconnect(nBanksPerChannel, nChannels))
|
||||
disconnectInner(ic.io.out)
|
||||
disconnectOuter(ic.io.in)
|
||||
muxOnSelect(io.in, ic.io.in, io.select === UInt(select))
|
||||
muxOnSelect(ic.io.out, io.out, io.select === UInt(select))
|
||||
}
|
||||
}
|
||||
308
src/main/scala/uncore/tilelink/Network.scala
Normal file
308
src/main/scala/uncore/tilelink/Network.scala
Normal file
@@ -0,0 +1,308 @@
|
||||
// See LICENSE for license details.
|
||||
|
||||
package uncore.tilelink
|
||||
|
||||
import Chisel._
|
||||
import uncore.util._
|
||||
import cde.{Parameters, Field}
|
||||
|
||||
case object LNEndpoints extends Field[Int]
|
||||
case object LNHeaderBits extends Field[Int]
|
||||
|
||||
class PhysicalHeader(n: Int) extends Bundle {
|
||||
val src = UInt(width = log2Up(n))
|
||||
val dst = UInt(width = log2Up(n))
|
||||
}
|
||||
|
||||
class PhysicalNetworkIO[T <: Data](n: Int, dType: T) extends Bundle {
|
||||
val header = new PhysicalHeader(n)
|
||||
val payload = dType.cloneType
|
||||
override def cloneType = new PhysicalNetworkIO(n,dType).asInstanceOf[this.type]
|
||||
}
|
||||
|
||||
class BasicCrossbarIO[T <: Data](n: Int, dType: T) extends Bundle {
|
||||
val in = Vec(n, Decoupled(new PhysicalNetworkIO(n,dType))).flip
|
||||
val out = Vec(n, Decoupled(new PhysicalNetworkIO(n,dType)))
|
||||
}
|
||||
|
||||
abstract class PhysicalNetwork extends Module
|
||||
|
||||
case class CrossbarConfig[T <: Data](n: Int, dType: T, count: Int = 1, needsLock: Option[PhysicalNetworkIO[T] => Bool] = None)
|
||||
|
||||
abstract class AbstractCrossbar[T <: Data](conf: CrossbarConfig[T]) extends PhysicalNetwork {
|
||||
val io = new BasicCrossbarIO(conf.n, conf.dType)
|
||||
}
|
||||
|
||||
class BasicBus[T <: Data](conf: CrossbarConfig[T]) extends AbstractCrossbar(conf) {
|
||||
val arb = Module(new LockingRRArbiter(io.in(0).bits, conf.n, conf.count, conf.needsLock))
|
||||
arb.io.in <> io.in
|
||||
|
||||
arb.io.out.ready := io.out(arb.io.out.bits.header.dst).ready
|
||||
for ((out, i) <- io.out zipWithIndex) {
|
||||
out.valid := arb.io.out.valid && arb.io.out.bits.header.dst === UInt(i)
|
||||
out.bits := arb.io.out.bits
|
||||
}
|
||||
}
|
||||
|
||||
class BasicCrossbar[T <: Data](conf: CrossbarConfig[T]) extends AbstractCrossbar(conf) {
|
||||
io.in.foreach { _.ready := Bool(false) }
|
||||
|
||||
io.out.zipWithIndex.map{ case (out, i) => {
|
||||
val rrarb = Module(new LockingRRArbiter(io.in(0).bits, conf.n, conf.count, conf.needsLock))
|
||||
(rrarb.io.in, io.in).zipped.map{ case (arb, in) => {
|
||||
val destined = in.bits.header.dst === UInt(i)
|
||||
arb.valid := in.valid && destined
|
||||
arb.bits := in.bits
|
||||
when (arb.ready && destined) { in.ready := Bool(true) }
|
||||
}}
|
||||
out <> rrarb.io.out
|
||||
}}
|
||||
}
|
||||
|
||||
abstract class LogicalNetwork extends Module
|
||||
|
||||
class LogicalHeader(implicit p: Parameters) extends junctions.ParameterizedBundle()(p) {
|
||||
val src = UInt(width = p(LNHeaderBits))
|
||||
val dst = UInt(width = p(LNHeaderBits))
|
||||
}
|
||||
|
||||
class LogicalNetworkIO[T <: Data](dType: T)(implicit p: Parameters) extends Bundle {
|
||||
val header = new LogicalHeader
|
||||
val payload = dType.cloneType
|
||||
override def cloneType = new LogicalNetworkIO(dType)(p).asInstanceOf[this.type]
|
||||
}
|
||||
|
||||
object DecoupledLogicalNetworkIOWrapper {
|
||||
def apply[T <: Data](
|
||||
in: DecoupledIO[T],
|
||||
src: UInt = UInt(0),
|
||||
dst: UInt = UInt(0))
|
||||
(implicit p: Parameters): DecoupledIO[LogicalNetworkIO[T]] = {
|
||||
val out = Wire(Decoupled(new LogicalNetworkIO(in.bits)))
|
||||
out.valid := in.valid
|
||||
out.bits.payload := in.bits
|
||||
out.bits.header.dst := dst
|
||||
out.bits.header.src := src
|
||||
in.ready := out.ready
|
||||
out
|
||||
}
|
||||
}
|
||||
|
||||
object DecoupledLogicalNetworkIOUnwrapper {
|
||||
def apply[T <: Data](in: DecoupledIO[LogicalNetworkIO[T]])
|
||||
(implicit p: Parameters): DecoupledIO[T] = {
|
||||
val out = Wire(Decoupled(in.bits.payload))
|
||||
out.valid := in.valid
|
||||
out.bits := in.bits.payload
|
||||
in.ready := out.ready
|
||||
out
|
||||
}
|
||||
}
|
||||
|
||||
object DefaultFromPhysicalShim {
|
||||
def apply[T <: Data](in: DecoupledIO[PhysicalNetworkIO[T]])
|
||||
(implicit p: Parameters): DecoupledIO[LogicalNetworkIO[T]] = {
|
||||
val out = Wire(Decoupled(new LogicalNetworkIO(in.bits.payload)))
|
||||
out.bits.header := in.bits.header
|
||||
out.bits.payload := in.bits.payload
|
||||
out.valid := in.valid
|
||||
in.ready := out.ready
|
||||
out
|
||||
}
|
||||
}
|
||||
|
||||
object DefaultToPhysicalShim {
|
||||
def apply[T <: Data](n: Int, in: DecoupledIO[LogicalNetworkIO[T]])
|
||||
(implicit p: Parameters): DecoupledIO[PhysicalNetworkIO[T]] = {
|
||||
val out = Wire(Decoupled(new PhysicalNetworkIO(n, in.bits.payload)))
|
||||
out.bits.header := in.bits.header
|
||||
out.bits.payload := in.bits.payload
|
||||
out.valid := in.valid
|
||||
in.ready := out.ready
|
||||
out
|
||||
}
|
||||
}
|
||||
|
||||
/** A helper module that automatically issues [[uncore.Finish]] messages in repsonse
|
||||
* to [[uncore.Grant]] that it receives from a manager and forwards to a client
|
||||
*/
|
||||
class FinishUnit(srcId: Int = 0, outstanding: Int = 2)(implicit p: Parameters) extends TLModule()(p)
|
||||
with HasDataBeatCounters {
|
||||
val io = new Bundle {
|
||||
val grant = Decoupled(new LogicalNetworkIO(new Grant)).flip
|
||||
val refill = Decoupled(new Grant)
|
||||
val finish = Decoupled(new LogicalNetworkIO(new Finish))
|
||||
val ready = Bool(OUTPUT)
|
||||
}
|
||||
|
||||
val g = io.grant.bits.payload
|
||||
|
||||
if(tlNetworkPreservesPointToPointOrdering) {
|
||||
io.finish.valid := Bool(false)
|
||||
io.refill.valid := io.grant.valid
|
||||
io.refill.bits := g
|
||||
io.grant.ready := io.refill.ready
|
||||
io.ready := Bool(true)
|
||||
} else {
|
||||
// We only want to send Finishes after we have collected all beats of
|
||||
// a multibeat Grant. But Grants from multiple managers or transactions may
|
||||
// get interleaved, so we could need a counter for each.
|
||||
val done = if(tlNetworkDoesNotInterleaveBeats) {
|
||||
connectIncomingDataBeatCounterWithHeader(io.grant)
|
||||
} else {
|
||||
val entries = 1 << tlClientXactIdBits
|
||||
def getId(g: LogicalNetworkIO[Grant]) = g.payload.client_xact_id
|
||||
assert(getId(io.grant.bits) <= UInt(entries), "Not enough grant beat counters, only " + entries + " entries.")
|
||||
connectIncomingDataBeatCountersWithHeader(io.grant, entries, getId).reduce(_||_)
|
||||
}
|
||||
val q = Module(new FinishQueue(outstanding))
|
||||
q.io.enq.valid := io.grant.fire() && g.requiresAck() && (!g.hasMultibeatData() || done)
|
||||
q.io.enq.bits := g.makeFinish()
|
||||
q.io.enq.bits.manager_id := io.grant.bits.header.src
|
||||
|
||||
io.finish.bits.header.src := UInt(srcId)
|
||||
io.finish.bits.header.dst := q.io.deq.bits.manager_id
|
||||
io.finish.bits.payload := q.io.deq.bits
|
||||
io.finish.valid := q.io.deq.valid
|
||||
q.io.deq.ready := io.finish.ready
|
||||
|
||||
io.refill.valid := (q.io.enq.ready || !g.requiresAck()) && io.grant.valid
|
||||
io.refill.bits := g
|
||||
io.grant.ready := (q.io.enq.ready || !g.requiresAck()) && io.refill.ready
|
||||
io.ready := q.io.enq.ready
|
||||
}
|
||||
}
|
||||
|
||||
class FinishQueue(entries: Int)(implicit p: Parameters) extends Queue(new FinishToDst()(p), entries)
|
||||
|
||||
/** A port to convert [[uncore.ClientTileLinkIO]].flip into [[uncore.TileLinkIO]]
|
||||
*
|
||||
* Creates network headers for [[uncore.Acquire]] and [[uncore.Release]] messages,
|
||||
* calculating header.dst and filling in header.src.
|
||||
* Strips headers from [[uncore.Probe Probes]].
|
||||
* Passes [[uncore.GrantFromSrc]] and accepts [[uncore.FinishFromDst]] in response,
|
||||
* setting up the headers for each.
|
||||
*
|
||||
* @param clientId network port id of this agent
|
||||
* @param addrConvert how a physical address maps to a destination manager port id
|
||||
*/
|
||||
class ClientTileLinkNetworkPort(clientId: Int, addrConvert: UInt => UInt)
|
||||
(implicit p: Parameters) extends TLModule()(p) {
|
||||
val io = new Bundle {
|
||||
val client = new ClientTileLinkIO().flip
|
||||
val network = new TileLinkIO
|
||||
}
|
||||
|
||||
val acq_with_header = ClientTileLinkHeaderCreator(io.client.acquire, clientId, addrConvert)
|
||||
val rel_with_header = ClientTileLinkHeaderCreator(io.client.release, clientId, addrConvert)
|
||||
val fin_with_header = ClientTileLinkHeaderCreator(io.client.finish, clientId)
|
||||
val prb_without_header = DecoupledLogicalNetworkIOUnwrapper(io.network.probe)
|
||||
val gnt_without_header = DecoupledLogicalNetworkIOUnwrapper(io.network.grant)
|
||||
|
||||
io.network.acquire <> acq_with_header
|
||||
io.network.release <> rel_with_header
|
||||
io.network.finish <> fin_with_header
|
||||
io.client.probe <> prb_without_header
|
||||
io.client.grant.bits.manager_id := io.network.grant.bits.header.src
|
||||
io.client.grant <> gnt_without_header
|
||||
}
|
||||
|
||||
/** A port to convert [[uncore.ClientUncachedTileLinkIO]].flip into [[uncore.TileLinkIO]]
|
||||
*
|
||||
* Creates network headers for [[uncore.Acquire]] and [[uncore.Release]] messages,
|
||||
* calculating header.dst and filling in header.src.
|
||||
* Responds to [[uncore.Grant]] by automatically issuing [[uncore.Finish]] to the granting managers.
|
||||
*
|
||||
* @param clientId network port id of this agent
|
||||
* @param addrConvert how a physical address maps to a destination manager port id
|
||||
*/
|
||||
class ClientUncachedTileLinkNetworkPort(clientId: Int, addrConvert: UInt => UInt)
|
||||
(implicit p: Parameters) extends TLModule()(p) {
|
||||
val io = new Bundle {
|
||||
val client = new ClientUncachedTileLinkIO().flip
|
||||
val network = new TileLinkIO
|
||||
}
|
||||
|
||||
val finisher = Module(new FinishUnit(clientId))
|
||||
finisher.io.grant <> io.network.grant
|
||||
io.network.finish <> finisher.io.finish
|
||||
|
||||
val acq_with_header = ClientTileLinkHeaderCreator(io.client.acquire, clientId, addrConvert)
|
||||
val gnt_without_header = finisher.io.refill
|
||||
|
||||
io.network.acquire.bits := acq_with_header.bits
|
||||
io.network.acquire.valid := acq_with_header.valid && finisher.io.ready
|
||||
acq_with_header.ready := io.network.acquire.ready && finisher.io.ready
|
||||
io.client.grant <> gnt_without_header
|
||||
io.network.probe.ready := Bool(false)
|
||||
io.network.release.valid := Bool(false)
|
||||
}
|
||||
|
||||
object ClientTileLinkHeaderCreator {
|
||||
def apply[T <: ClientToManagerChannel with HasManagerId](
|
||||
in: DecoupledIO[T],
|
||||
clientId: Int)
|
||||
(implicit p: Parameters): DecoupledIO[LogicalNetworkIO[T]] = {
|
||||
val out = Wire(new DecoupledIO(new LogicalNetworkIO(in.bits)))
|
||||
out.bits.payload := in.bits
|
||||
out.bits.header.src := UInt(clientId)
|
||||
out.bits.header.dst := in.bits.manager_id
|
||||
out.valid := in.valid
|
||||
in.ready := out.ready
|
||||
out
|
||||
}
|
||||
def apply[T <: ClientToManagerChannel with HasCacheBlockAddress](
|
||||
in: DecoupledIO[T],
|
||||
clientId: Int,
|
||||
addrConvert: UInt => UInt)
|
||||
(implicit p: Parameters): DecoupledIO[LogicalNetworkIO[T]] = {
|
||||
val out = Wire(new DecoupledIO(new LogicalNetworkIO(in.bits)))
|
||||
out.bits.payload := in.bits
|
||||
out.bits.header.src := UInt(clientId)
|
||||
out.bits.header.dst := addrConvert(in.bits.addr_block)
|
||||
out.valid := in.valid
|
||||
in.ready := out.ready
|
||||
out
|
||||
}
|
||||
}
|
||||
|
||||
/** A port to convert [[uncore.ManagerTileLinkIO]].flip into [[uncore.TileLinkIO]].flip
|
||||
*
|
||||
* Creates network headers for [[uncore.Probe]] and [[uncore.Grant]] messagess,
|
||||
* calculating header.dst and filling in header.src.
|
||||
* Strips headers from [[uncore.Acquire]], [[uncore.Release]] and [[uncore.Finish]],
|
||||
* but supplies client_id instead.
|
||||
*
|
||||
* @param managerId the network port id of this agent
|
||||
* @param idConvert how a sharer id maps to a destination client port id
|
||||
*/
|
||||
class ManagerTileLinkNetworkPort(managerId: Int, idConvert: UInt => UInt)
|
||||
(implicit p: Parameters) extends TLModule()(p) {
|
||||
val io = new Bundle {
|
||||
val manager = new ManagerTileLinkIO().flip
|
||||
val network = new TileLinkIO().flip
|
||||
}
|
||||
io.network.grant <> ManagerTileLinkHeaderCreator(io.manager.grant, managerId, (u: UInt) => u)
|
||||
io.network.probe <> ManagerTileLinkHeaderCreator(io.manager.probe, managerId, idConvert)
|
||||
io.manager.acquire <> DecoupledLogicalNetworkIOUnwrapper(io.network.acquire)
|
||||
io.manager.acquire.bits.client_id := io.network.acquire.bits.header.src
|
||||
io.manager.release <> DecoupledLogicalNetworkIOUnwrapper(io.network.release)
|
||||
io.manager.release.bits.client_id := io.network.release.bits.header.src
|
||||
io.manager.finish <> DecoupledLogicalNetworkIOUnwrapper(io.network.finish)
|
||||
}
|
||||
|
||||
object ManagerTileLinkHeaderCreator {
|
||||
def apply[T <: ManagerToClientChannel with HasClientId](
|
||||
in: DecoupledIO[T],
|
||||
managerId: Int,
|
||||
idConvert: UInt => UInt)
|
||||
(implicit p: Parameters): DecoupledIO[LogicalNetworkIO[T]] = {
|
||||
val out = Wire(new DecoupledIO(new LogicalNetworkIO(in.bits)))
|
||||
out.bits.payload := in.bits
|
||||
out.bits.header.src := UInt(managerId)
|
||||
out.bits.header.dst := idConvert(in.bits.client_id)
|
||||
out.valid := in.valid
|
||||
in.ready := out.ready
|
||||
out
|
||||
}
|
||||
}
|
||||
422
src/main/scala/uncore/unittests/Drivers.scala
Normal file
422
src/main/scala/uncore/unittests/Drivers.scala
Normal file
@@ -0,0 +1,422 @@
|
||||
package uncore.unittests
|
||||
|
||||
import Chisel._
|
||||
import junctions._
|
||||
import uncore.tilelink._
|
||||
import uncore.constants._
|
||||
import uncore.util._
|
||||
import cde.Parameters
|
||||
|
||||
abstract class Driver(implicit p: Parameters) extends TLModule()(p) {
|
||||
val io = new Bundle {
|
||||
val mem = new ClientUncachedTileLinkIO
|
||||
val start = Bool(INPUT)
|
||||
val finished = Bool(OUTPUT)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Tests that single-beat Gets of decreasing size return subsets of the
|
||||
* data returned by larger Gets
|
||||
*/
|
||||
class GetMultiWidthDriver(implicit p: Parameters) extends Driver()(p) {
|
||||
val s_start :: s_send :: s_recv :: s_done :: Nil = Enum(Bits(), 4)
|
||||
val state = Reg(init = s_start)
|
||||
|
||||
val w = 64
|
||||
val initialSize = UInt(log2Ceil(w/8))
|
||||
val size = Reg(UInt(width = log2Ceil(log2Ceil(w/8)+1)))
|
||||
val ref = Reg(UInt(width = w))
|
||||
val bytemask = (UInt(1) << (UInt(1) << size)) - UInt(1)
|
||||
val bitmask = FillInterleaved(8, bytemask)
|
||||
|
||||
io.mem.acquire.valid := (state === s_send)
|
||||
io.mem.acquire.bits := Get(
|
||||
client_xact_id = UInt(0),
|
||||
addr_block = UInt(0),
|
||||
addr_beat = UInt(0),
|
||||
addr_byte = UInt(0),
|
||||
operand_size = size,
|
||||
alloc = Bool(false))
|
||||
io.mem.grant.ready := (state === s_recv)
|
||||
|
||||
when (state === s_start && io.start) {
|
||||
size := initialSize
|
||||
state := s_send
|
||||
}
|
||||
|
||||
when (io.mem.acquire.fire()) { state := s_recv }
|
||||
when (io.mem.grant.fire()) {
|
||||
when (size === initialSize) { ref := io.mem.grant.bits.data }
|
||||
size := size - UInt(1)
|
||||
state := Mux(size === UInt(0), s_done, s_send)
|
||||
}
|
||||
|
||||
io.finished := state === s_done
|
||||
|
||||
assert(!io.mem.grant.valid || size === initialSize ||
|
||||
(io.mem.grant.bits.data & bitmask) === (ref & bitmask),
|
||||
"GetMultiWidth: smaller get does not match larger get")
|
||||
}
|
||||
|
||||
/**
|
||||
* Tests that single-beat Gets across a range of memory return
|
||||
* the expected data.
|
||||
* @param expected The values of the data expected to be read.
|
||||
* Each element is the data for one beat.
|
||||
*/
|
||||
class GetSweepDriver(expected: Seq[BigInt])
|
||||
(implicit p: Parameters) extends Driver()(p) {
|
||||
|
||||
val s_start :: s_send :: s_recv :: s_done :: Nil = Enum(Bits(), 4)
|
||||
val state = Reg(init = s_start)
|
||||
|
||||
val nReqs = expected.size
|
||||
val (req_cnt, req_done) = Counter(io.mem.grant.fire(), nReqs)
|
||||
|
||||
when (state === s_start && io.start) { state := s_send }
|
||||
when (io.mem.acquire.fire()) { state := s_recv }
|
||||
when (io.mem.grant.fire()) { state := s_send }
|
||||
when (req_done) { state := s_done }
|
||||
|
||||
val (addr_block, addr_beat) = if (nReqs > tlDataBeats) {
|
||||
(req_cnt(log2Up(nReqs) - 1, tlBeatAddrBits),
|
||||
req_cnt(tlBeatAddrBits - 1, 0))
|
||||
} else {
|
||||
(UInt(0), req_cnt)
|
||||
}
|
||||
|
||||
val exp_data = Vec(expected.map(e => UInt(e, tlDataBits)))
|
||||
|
||||
io.mem.acquire.valid := (state === s_send)
|
||||
io.mem.acquire.bits := Get(
|
||||
client_xact_id = UInt(0),
|
||||
addr_block = addr_block,
|
||||
addr_beat = addr_beat)
|
||||
io.mem.grant.ready := (state === s_recv)
|
||||
io.finished := state === s_done
|
||||
|
||||
assert(!io.mem.grant.valid || io.mem.grant.bits.data === exp_data(req_cnt),
|
||||
"GetSweep: data does not match expected")
|
||||
}
|
||||
|
||||
/**
|
||||
* Tests that multi-beat GetBlocks across a range of memory return
|
||||
* the expected data.
|
||||
* @param expected The values of the data expected to be read.
|
||||
* Each element is the data for one beat.
|
||||
*/
|
||||
class GetBlockSweepDriver(expected: Seq[BigInt])
|
||||
(implicit p: Parameters) extends Driver()(p) {
|
||||
val s_start :: s_send :: s_recv :: s_done :: Nil = Enum(Bits(), 4)
|
||||
val state = Reg(init = s_start)
|
||||
|
||||
val nReqs = ((expected.size - 1) / tlDataBeats + 1) * tlDataBeats
|
||||
val (req_cnt, req_done) = Counter(io.mem.grant.fire(), nReqs)
|
||||
val (addr_beat, beats_done) = Counter(io.mem.grant.fire(), tlDataBeats)
|
||||
|
||||
val tlBlockOffset = tlByteAddrBits + tlBeatAddrBits
|
||||
val addr_block =
|
||||
if (nReqs > tlDataBeats) req_cnt(log2Up(nReqs) - 1, tlBlockOffset)
|
||||
else UInt(0)
|
||||
|
||||
io.mem.acquire.valid := (state === s_send)
|
||||
io.mem.acquire.bits := GetBlock(
|
||||
client_xact_id = UInt(0),
|
||||
addr_block = addr_block)
|
||||
io.mem.grant.ready := (state === s_recv)
|
||||
io.finished := state === s_done
|
||||
|
||||
when (state === s_start && io.start) { state := s_send }
|
||||
when (io.mem.acquire.fire()) { state := s_recv }
|
||||
when (beats_done) { state := s_send }
|
||||
when (req_done) { state := s_done }
|
||||
|
||||
val exp_data = Vec(expected.map(e => UInt(e, tlDataBits)))
|
||||
|
||||
assert(!io.mem.grant.valid || req_cnt >= UInt(expected.size) ||
|
||||
io.mem.grant.bits.data === exp_data(req_cnt),
|
||||
"GetBlockSweep: data does not match expected")
|
||||
}
|
||||
|
||||
/**
|
||||
* Tests that single-beat Puts across a range of memory persists correctly.
|
||||
* @param n the number of beats to put
|
||||
*/
|
||||
class PutSweepDriver(val n: Int)(implicit p: Parameters) extends Driver()(p) {
|
||||
val (s_idle :: s_put_req :: s_put_resp ::
|
||||
s_get_req :: s_get_resp :: s_done :: Nil) = Enum(Bits(), 6)
|
||||
val state = Reg(init = s_idle)
|
||||
|
||||
val (put_cnt, put_done) = Counter(state === s_put_resp && io.mem.grant.valid, n)
|
||||
val (get_cnt, get_done) = Counter(state === s_get_resp && io.mem.grant.valid, n)
|
||||
|
||||
val (put_block, put_beat) = if (n > tlDataBeats) {
|
||||
(put_cnt(log2Up(n) - 1, tlBeatAddrBits),
|
||||
put_cnt(tlBeatAddrBits - 1, 0))
|
||||
} else {
|
||||
(UInt(0), put_cnt)
|
||||
}
|
||||
val (get_block, get_beat) = if (n > tlDataBeats) {
|
||||
(get_cnt(log2Up(n) - 1, tlBeatAddrBits),
|
||||
get_cnt(tlBeatAddrBits - 1, 0))
|
||||
} else {
|
||||
(UInt(0), get_cnt)
|
||||
}
|
||||
|
||||
val dataRep = (tlDataBits - 1) / log2Up(n) + 1
|
||||
val put_data = Fill(dataRep, put_cnt)(tlDataBits - 1, 0)
|
||||
val get_data = Fill(dataRep, get_cnt)(tlDataBits - 1, 0)
|
||||
|
||||
io.mem.acquire.valid := state.isOneOf(s_put_req, s_get_req)
|
||||
io.mem.acquire.bits := Mux(state === s_put_req,
|
||||
Put(
|
||||
client_xact_id = UInt(0),
|
||||
addr_block = put_block,
|
||||
addr_beat = put_beat,
|
||||
data = put_data),
|
||||
Get(
|
||||
client_xact_id = UInt(0),
|
||||
addr_block = get_block,
|
||||
addr_beat = get_beat))
|
||||
io.mem.grant.ready := state.isOneOf(s_put_resp, s_get_resp)
|
||||
|
||||
when (state === s_idle && io.start) { state := s_put_req }
|
||||
when (state === s_put_req && io.mem.acquire.ready) { state := s_put_resp }
|
||||
when (state === s_put_resp && io.mem.grant.valid) {
|
||||
state := Mux(put_done, s_get_req, s_put_req)
|
||||
}
|
||||
when (state === s_get_req && io.mem.acquire.ready) { state := s_get_resp }
|
||||
when (state === s_get_resp && io.mem.grant.valid) {
|
||||
state := Mux(get_done, s_done, s_get_req)
|
||||
}
|
||||
|
||||
io.finished := (state === s_done)
|
||||
|
||||
assert(!io.mem.grant.valid || !io.mem.grant.bits.hasData() ||
|
||||
io.mem.grant.bits.data === get_data,
|
||||
"PutSweepDriver: data does not match")
|
||||
}
|
||||
|
||||
/**
|
||||
* Tests that write-masked single-beat puts work correctly by putting
|
||||
* data with steadily smaller write-masks to the same beat.
|
||||
* @param minBytes the smallest number of bytes that can be in the writemask
|
||||
*/
|
||||
class PutMaskDriver(minBytes: Int = 1)(implicit p: Parameters) extends Driver()(p) {
|
||||
val (s_idle :: s_put_req :: s_put_resp ::
|
||||
s_get_req :: s_get_resp :: s_done :: Nil) = Enum(Bits(), 6)
|
||||
val state = Reg(init = s_idle)
|
||||
val nbytes = Reg(UInt(width = log2Up(tlWriteMaskBits) + 1))
|
||||
val wmask = (UInt(1) << nbytes) - UInt(1)
|
||||
val wdata = Fill(tlDataBits / 8, Wire(UInt(width = 8), init = nbytes))
|
||||
// TL data bytes down to minBytes logarithmically by 2
|
||||
val expected = (log2Ceil(tlDataBits / 8) to log2Ceil(minBytes) by -1)
|
||||
.map(1 << _).foldLeft(UInt(0, tlDataBits)) {
|
||||
// Change the lower nbytes of the value
|
||||
(value, nbytes) => {
|
||||
val mask = UInt((BigInt(1) << (nbytes * 8)) - BigInt(1), tlDataBits)
|
||||
val wval = Fill(tlDataBits / 8, UInt(nbytes, 8))
|
||||
(value & ~mask) | (wval & mask)
|
||||
}
|
||||
}
|
||||
|
||||
when (state === s_idle && io.start) {
|
||||
state := s_put_req
|
||||
nbytes := UInt(8)
|
||||
}
|
||||
when (state === s_put_req && io.mem.acquire.ready) {
|
||||
state := s_put_resp
|
||||
}
|
||||
when (state === s_put_resp && io.mem.grant.valid) {
|
||||
nbytes := nbytes >> UInt(1)
|
||||
state := Mux(nbytes === UInt(minBytes), s_get_req, s_put_req)
|
||||
}
|
||||
when (state === s_get_req && io.mem.acquire.ready) {
|
||||
state := s_get_resp
|
||||
}
|
||||
when (state === s_get_resp && io.mem.grant.valid) {
|
||||
state := s_done
|
||||
}
|
||||
|
||||
io.finished := (state === s_done)
|
||||
io.mem.acquire.valid := state.isOneOf(s_put_req, s_get_req)
|
||||
io.mem.acquire.bits := Mux(state === s_put_req,
|
||||
Put(
|
||||
client_xact_id = UInt(0),
|
||||
addr_block = UInt(0),
|
||||
addr_beat = UInt(0),
|
||||
data = wdata,
|
||||
wmask = Some(wmask)),
|
||||
Get(
|
||||
client_xact_id = UInt(0),
|
||||
addr_block = UInt(0),
|
||||
addr_beat = UInt(0)))
|
||||
io.mem.grant.ready := state.isOneOf(s_put_resp, s_get_resp)
|
||||
|
||||
assert(!io.mem.grant.valid || state =/= s_get_resp ||
|
||||
io.mem.grant.bits.data === expected,
|
||||
"PutMask: data does not match expected")
|
||||
}
|
||||
|
||||
class PutBlockSweepDriver(val n: Int)(implicit p: Parameters)
|
||||
extends Driver()(p) {
|
||||
val (s_idle :: s_put_req :: s_put_resp ::
|
||||
s_get_req :: s_get_resp :: s_done :: Nil) = Enum(Bits(), 6)
|
||||
val state = Reg(init = s_idle)
|
||||
|
||||
val (put_beat, put_beat_done) = Counter(
|
||||
state === s_put_req && io.mem.acquire.ready, tlDataBeats)
|
||||
val (put_cnt, put_done) = Counter(
|
||||
state === s_put_resp && io.mem.grant.valid, n)
|
||||
val (get_beat, get_beat_done) = Counter(
|
||||
state === s_get_resp && io.mem.grant.valid, tlDataBeats)
|
||||
val (get_cnt, get_done) = Counter(get_beat_done, n)
|
||||
|
||||
val dataRep = (tlDataBits - 1) / (log2Up(n) + tlBeatAddrBits) + 1
|
||||
val put_data = Fill(dataRep, Cat(put_cnt, put_beat))(tlDataBits - 1, 0)
|
||||
val get_data = Fill(dataRep, Cat(get_cnt, get_beat))(tlDataBits - 1, 0)
|
||||
|
||||
when (state === s_idle && io.start) { state := s_put_req }
|
||||
when (put_beat_done) { state := s_put_resp }
|
||||
when (state === s_put_resp && io.mem.grant.valid) {
|
||||
state := Mux(put_done, s_get_req, s_put_req)
|
||||
}
|
||||
when (state === s_get_req && io.mem.acquire.ready) { state := s_get_resp }
|
||||
when (get_beat_done) { state := Mux(get_done, s_done, s_get_req) }
|
||||
|
||||
val put_acquire = PutBlock(
|
||||
client_xact_id = UInt(0),
|
||||
addr_block = put_cnt,
|
||||
addr_beat = put_beat,
|
||||
data = put_data)
|
||||
|
||||
val get_acquire = GetBlock(
|
||||
client_xact_id = UInt(0),
|
||||
addr_block = get_cnt)
|
||||
|
||||
io.finished := (state === s_done)
|
||||
io.mem.acquire.valid := state.isOneOf(s_put_req, s_get_req)
|
||||
io.mem.acquire.bits := Mux(state === s_put_req, put_acquire, get_acquire)
|
||||
io.mem.grant.ready := state.isOneOf(s_put_resp, s_get_resp)
|
||||
|
||||
assert(!io.mem.grant.valid || state =/= s_get_resp ||
|
||||
io.mem.grant.bits.data === get_data,
|
||||
"PutBlockSweep: data does not match expected")
|
||||
}
|
||||
|
||||
class PutAtomicDriver(implicit p: Parameters) extends Driver()(p) {
|
||||
val s_idle :: s_put :: s_atomic :: s_get :: s_done :: Nil = Enum(Bits(), 5)
|
||||
val state = Reg(init = s_idle)
|
||||
val sending = Reg(init = Bool(false))
|
||||
|
||||
val put_acquire = Put(
|
||||
client_xact_id = UInt(0),
|
||||
addr_block = UInt(0),
|
||||
addr_beat = UInt(0),
|
||||
// Put 15 in bytes 7:4
|
||||
data = UInt(15L << 32),
|
||||
wmask = Some(UInt(0xf0)))
|
||||
|
||||
val amo_acquire = PutAtomic(
|
||||
client_xact_id = UInt(0),
|
||||
addr_block = UInt(0),
|
||||
addr_beat = UInt(0),
|
||||
addr_byte = UInt(4),
|
||||
atomic_opcode = M_XA_ADD,
|
||||
operand_size = UInt(log2Ceil(32 / 8)),
|
||||
data = UInt(3L << 32))
|
||||
|
||||
val get_acquire = Get(
|
||||
client_xact_id = UInt(0),
|
||||
addr_block = UInt(0),
|
||||
addr_beat = UInt(0))
|
||||
|
||||
io.finished := (state === s_done)
|
||||
io.mem.acquire.valid := sending
|
||||
io.mem.acquire.bits := MuxLookup(state, get_acquire, Seq(
|
||||
s_put -> put_acquire,
|
||||
s_atomic -> amo_acquire,
|
||||
s_get -> get_acquire))
|
||||
io.mem.grant.ready := !sending
|
||||
|
||||
when (io.mem.acquire.fire()) { sending := Bool(false) }
|
||||
|
||||
when (state === s_idle && io.start) {
|
||||
state := s_put
|
||||
sending := Bool(true)
|
||||
}
|
||||
when (io.mem.grant.fire()) {
|
||||
when (state === s_put) { sending := Bool(true); state := s_atomic }
|
||||
when (state === s_atomic) { sending := Bool(true); state := s_get }
|
||||
when (state === s_get) { state := s_done }
|
||||
}
|
||||
|
||||
assert(!io.mem.grant.valid || state =/= s_get ||
|
||||
io.mem.grant.bits.data(63, 32) === UInt(18))
|
||||
}
|
||||
|
||||
class PrefetchDriver(implicit p: Parameters) extends Driver()(p) {
|
||||
val s_idle :: s_put_pf :: s_get_pf :: s_done :: Nil = Enum(Bits(), 4)
|
||||
val state = Reg(init = s_idle)
|
||||
val sending = Reg(init = Bool(false))
|
||||
|
||||
when (state === s_idle) {
|
||||
sending := Bool(true)
|
||||
state := s_put_pf
|
||||
}
|
||||
|
||||
when (io.mem.acquire.fire()) { sending := Bool(false) }
|
||||
when (io.mem.grant.fire()) {
|
||||
when (state === s_put_pf) { sending := Bool(true); state := s_get_pf }
|
||||
when (state === s_get_pf) { state := s_done }
|
||||
}
|
||||
|
||||
io.finished := (state === s_done)
|
||||
io.mem.acquire.valid := sending
|
||||
io.mem.acquire.bits := Mux(state === s_put_pf,
|
||||
PutPrefetch(
|
||||
client_xact_id = UInt(0),
|
||||
addr_block = UInt(0)),
|
||||
GetPrefetch(
|
||||
client_xact_id = UInt(0),
|
||||
addr_block = UInt(0)))
|
||||
io.mem.grant.ready := !sending
|
||||
}
|
||||
|
||||
class DriverSet(driverGen: Parameters => Seq[Driver])(implicit p: Parameters)
|
||||
extends Driver()(p) {
|
||||
val s_start :: s_run :: s_done :: Nil = Enum(Bits(), 3)
|
||||
val state = Reg(init = s_start)
|
||||
|
||||
val drivers = driverGen(p)
|
||||
val idx = Reg(init = UInt(0, log2Up(drivers.size)))
|
||||
val finished = Wire(init = Bool(false))
|
||||
|
||||
when (state === s_start && io.start) { state := s_run }
|
||||
when (state === s_run && finished) {
|
||||
when (idx === UInt(drivers.size - 1)) { state := s_done }
|
||||
idx := idx + UInt(1)
|
||||
}
|
||||
|
||||
io.finished := state === s_done
|
||||
|
||||
io.mem.acquire.valid := Bool(false)
|
||||
io.mem.grant.ready := Bool(false)
|
||||
|
||||
drivers.zipWithIndex.foreach { case (driv, i) =>
|
||||
val me = idx === UInt(i)
|
||||
|
||||
driv.io.start := me && state === s_run
|
||||
driv.io.mem.acquire.ready := io.mem.acquire.ready && me
|
||||
driv.io.mem.grant.valid := io.mem.grant.valid && me
|
||||
driv.io.mem.grant.bits := io.mem.grant.bits
|
||||
|
||||
when (me) {
|
||||
io.mem.acquire.valid := driv.io.mem.acquire.valid
|
||||
io.mem.acquire.bits := driv.io.mem.acquire.bits
|
||||
io.mem.grant.ready := driv.io.mem.grant.ready
|
||||
finished := driv.io.finished
|
||||
}
|
||||
}
|
||||
}
|
||||
85
src/main/scala/uncore/unittests/Tests.scala
Normal file
85
src/main/scala/uncore/unittests/Tests.scala
Normal file
@@ -0,0 +1,85 @@
|
||||
package uncore.unittests
|
||||
|
||||
import Chisel._
|
||||
import junctions._
|
||||
import junctions.unittests._
|
||||
import uncore.devices._
|
||||
import uncore.tilelink._
|
||||
import uncore.converters._
|
||||
import cde.Parameters
|
||||
|
||||
class SmiConverterTest(implicit val p: Parameters) extends UnitTest
|
||||
with HasTileLinkParameters {
|
||||
val outermostParams = p.alterPartial({ case TLId => "Outermost" })
|
||||
|
||||
val smiWidth = 32
|
||||
val smiDepth = 64
|
||||
val tlDepth = (smiWidth * smiDepth) / tlDataBits
|
||||
|
||||
val smimem = Module(new SmiMem(smiWidth, smiDepth))
|
||||
val conv = Module(new SmiIOTileLinkIOConverter(
|
||||
smiWidth, log2Up(smiDepth))(outermostParams))
|
||||
val driver = Module(new DriverSet(
|
||||
(driverParams: Parameters) => {
|
||||
implicit val p = driverParams
|
||||
Seq(
|
||||
Module(new PutSweepDriver(tlDepth)),
|
||||
Module(new PutMaskDriver(smiWidth / 8)),
|
||||
Module(new PutBlockSweepDriver(tlDepth / tlDataBeats)),
|
||||
Module(new GetMultiWidthDriver))
|
||||
})(outermostParams))
|
||||
|
||||
conv.io.tl <> driver.io.mem
|
||||
smimem.io <> conv.io.smi
|
||||
driver.io.start := io.start
|
||||
io.finished := driver.io.finished
|
||||
}
|
||||
|
||||
class ROMSlaveTest(implicit p: Parameters) extends UnitTest {
|
||||
implicit val testName = "ROMSlaveTest"
|
||||
val romdata = Seq(
|
||||
BigInt("01234567deadbeef", 16),
|
||||
BigInt("ab32fee8d00dfeed", 16))
|
||||
val rombytes = romdata.map(_.toByteArray.reverse).flatten
|
||||
val rom = Module(new ROMSlave(rombytes))
|
||||
val driver = Module(new DriverSet(
|
||||
(driverParams: Parameters) => {
|
||||
implicit val p = driverParams
|
||||
Seq(
|
||||
Module(new GetMultiWidthDriver),
|
||||
Module(new GetSweepDriver(romdata)),
|
||||
Module(new GetBlockSweepDriver(romdata)))
|
||||
}))
|
||||
rom.io <> driver.io.mem
|
||||
driver.io.start := io.start
|
||||
io.finished := driver.io.finished
|
||||
}
|
||||
|
||||
class TileLinkRAMTest(implicit val p: Parameters)
|
||||
extends UnitTest with HasTileLinkParameters {
|
||||
|
||||
val depth = 2 * tlDataBeats
|
||||
val ram = Module(new TileLinkTestRAM(depth))
|
||||
val driver = Module(new DriverSet(
|
||||
(driverParams: Parameters) => {
|
||||
implicit val p = driverParams
|
||||
Seq(
|
||||
Module(new PutSweepDriver(depth)),
|
||||
Module(new PutMaskDriver),
|
||||
Module(new PutAtomicDriver),
|
||||
Module(new PutBlockSweepDriver(depth / tlDataBeats)),
|
||||
Module(new PrefetchDriver),
|
||||
Module(new GetMultiWidthDriver))
|
||||
}))
|
||||
ram.io <> driver.io.mem
|
||||
driver.io.start := io.start
|
||||
io.finished := driver.io.finished
|
||||
}
|
||||
|
||||
object UncoreUnitTests {
|
||||
def apply(implicit p: Parameters): Seq[UnitTest] =
|
||||
Seq(
|
||||
Module(new SmiConverterTest),
|
||||
Module(new ROMSlaveTest),
|
||||
Module(new TileLinkRAMTest))
|
||||
}
|
||||
105
src/main/scala/uncore/util/AmoAlu.scala
Normal file
105
src/main/scala/uncore/util/AmoAlu.scala
Normal file
@@ -0,0 +1,105 @@
|
||||
// See LICENSE for license details.
|
||||
|
||||
package uncore.util
|
||||
|
||||
import Chisel._
|
||||
import uncore.tilelink._
|
||||
import cde.Parameters
|
||||
import uncore.constants._
|
||||
|
||||
class StoreGen(typ: UInt, addr: UInt, dat: UInt, maxSize: Int) {
|
||||
val size = typ(log2Up(log2Up(maxSize)+1)-1,0)
|
||||
def misaligned =
|
||||
(addr & ((UInt(1) << size) - UInt(1))(log2Up(maxSize)-1,0)).orR
|
||||
|
||||
def mask = {
|
||||
var res = UInt(1)
|
||||
for (i <- 0 until log2Up(maxSize)) {
|
||||
val upper = Mux(addr(i), res, UInt(0)) | Mux(size >= UInt(i+1), UInt((BigInt(1) << (1 << i))-1), UInt(0))
|
||||
val lower = Mux(addr(i), UInt(0), res)
|
||||
res = Cat(upper, lower)
|
||||
}
|
||||
res
|
||||
}
|
||||
|
||||
protected def genData(i: Int): UInt =
|
||||
if (i >= log2Up(maxSize)) dat
|
||||
else Mux(size === UInt(i), Fill(1 << (log2Up(maxSize)-i), dat((8 << i)-1,0)), genData(i+1))
|
||||
|
||||
def data = genData(0)
|
||||
def wordData = genData(2)
|
||||
}
|
||||
|
||||
class StoreGenAligned(typ: UInt, addr: UInt, dat: UInt, maxSize: Int) extends StoreGen(typ, addr, dat, maxSize) {
|
||||
override def genData(i: Int) = dat
|
||||
}
|
||||
|
||||
class LoadGen(typ: UInt, signed: Bool, addr: UInt, dat: UInt, zero: Bool, maxSize: Int) {
|
||||
private val size = new StoreGen(typ, addr, dat, maxSize).size
|
||||
|
||||
private def genData(logMinSize: Int): UInt = {
|
||||
var res = dat
|
||||
for (i <- log2Up(maxSize)-1 to logMinSize by -1) {
|
||||
val pos = 8 << i
|
||||
val shifted = Mux(addr(i), res(2*pos-1,pos), res(pos-1,0))
|
||||
val doZero = Bool(i == 0) && zero
|
||||
val zeroed = Mux(doZero, UInt(0), shifted)
|
||||
res = Cat(Mux(size === UInt(i) || doZero, Fill(8*maxSize-pos, signed && zeroed(pos-1)), res(8*maxSize-1,pos)), zeroed)
|
||||
}
|
||||
res
|
||||
}
|
||||
|
||||
def wordData = genData(2)
|
||||
def data = genData(0)
|
||||
}
|
||||
|
||||
class AMOALU(operandBits: Int, rhsIsAligned: Boolean = false)(implicit p: Parameters) extends Module {
|
||||
require(operandBits == 32 || operandBits == 64)
|
||||
val io = new Bundle {
|
||||
val addr = Bits(INPUT, log2Ceil(operandBits/8))
|
||||
val cmd = Bits(INPUT, M_SZ)
|
||||
val typ = Bits(INPUT, log2Ceil(log2Ceil(operandBits/8) + 1))
|
||||
val lhs = Bits(INPUT, operandBits)
|
||||
val rhs = Bits(INPUT, operandBits)
|
||||
val out = Bits(OUTPUT, operandBits)
|
||||
}
|
||||
|
||||
val storegen =
|
||||
if(rhsIsAligned) new StoreGenAligned(io.typ, io.addr, io.rhs, operandBits/8)
|
||||
else new StoreGen(io.typ, io.addr, io.rhs, operandBits/8)
|
||||
val rhs = storegen.wordData
|
||||
|
||||
val sgned = io.cmd === M_XA_MIN || io.cmd === M_XA_MAX
|
||||
val max = io.cmd === M_XA_MAX || io.cmd === M_XA_MAXU
|
||||
val min = io.cmd === M_XA_MIN || io.cmd === M_XA_MINU
|
||||
|
||||
val adder_out =
|
||||
if (operandBits == 32) io.lhs + rhs
|
||||
else {
|
||||
val mask = ~UInt(0,64) ^ (io.addr(2) << 31)
|
||||
(io.lhs & mask) + (rhs & mask)
|
||||
}
|
||||
|
||||
val less =
|
||||
if (operandBits == 32) Mux(io.lhs(31) === rhs(31), io.lhs < rhs, Mux(sgned, io.lhs(31), io.rhs(31)))
|
||||
else {
|
||||
val word = !io.typ(0)
|
||||
val cmp_lhs = Mux(word && !io.addr(2), io.lhs(31), io.lhs(63))
|
||||
val cmp_rhs = Mux(word && !io.addr(2), rhs(31), rhs(63))
|
||||
val lt_lo = io.lhs(31,0) < rhs(31,0)
|
||||
val lt_hi = io.lhs(63,32) < rhs(63,32)
|
||||
val eq_hi = io.lhs(63,32) === rhs(63,32)
|
||||
val lt = Mux(word, Mux(io.addr(2), lt_hi, lt_lo), lt_hi || eq_hi && lt_lo)
|
||||
Mux(cmp_lhs === cmp_rhs, lt, Mux(sgned, cmp_lhs, cmp_rhs))
|
||||
}
|
||||
|
||||
val out = Mux(io.cmd === M_XA_ADD, adder_out,
|
||||
Mux(io.cmd === M_XA_AND, io.lhs & rhs,
|
||||
Mux(io.cmd === M_XA_OR, io.lhs | rhs,
|
||||
Mux(io.cmd === M_XA_XOR, io.lhs ^ rhs,
|
||||
Mux(Mux(less, min, max), io.lhs,
|
||||
storegen.data)))))
|
||||
|
||||
val wmask = FillInterleaved(8, storegen.mask)
|
||||
io.out := wmask & out | ~wmask & io.lhs
|
||||
}
|
||||
134
src/main/scala/uncore/util/Counters.scala
Normal file
134
src/main/scala/uncore/util/Counters.scala
Normal file
@@ -0,0 +1,134 @@
|
||||
package uncore.util
|
||||
|
||||
import Chisel._
|
||||
import uncore.tilelink._
|
||||
import cde.Parameters
|
||||
|
||||
// Produces 0-width value when counting to 1
|
||||
class ZCounter(val n: Int) {
|
||||
val value = Reg(init=UInt(0, log2Ceil(n)))
|
||||
def inc(): Bool = {
|
||||
if (n == 1) Bool(true)
|
||||
else {
|
||||
val wrap = value === UInt(n-1)
|
||||
value := Mux(Bool(!isPow2(n)) && wrap, UInt(0), value + UInt(1))
|
||||
wrap
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
object ZCounter {
|
||||
def apply(n: Int) = new ZCounter(n)
|
||||
def apply(cond: Bool, n: Int): (UInt, Bool) = {
|
||||
val c = new ZCounter(n)
|
||||
var wrap: Bool = null
|
||||
when (cond) { wrap = c.inc() }
|
||||
(c.value, cond && wrap)
|
||||
}
|
||||
}
|
||||
|
||||
object TwoWayCounter {
|
||||
def apply(up: Bool, down: Bool, max: Int): UInt = {
|
||||
val cnt = Reg(init = UInt(0, log2Up(max+1)))
|
||||
when (up && !down) { cnt := cnt + UInt(1) }
|
||||
when (down && !up) { cnt := cnt - UInt(1) }
|
||||
cnt
|
||||
}
|
||||
}
|
||||
|
||||
class BeatCounterStatus extends Bundle {
|
||||
val idx = UInt()
|
||||
val done = Bool()
|
||||
}
|
||||
|
||||
class TwoWayBeatCounterStatus extends Bundle {
|
||||
val pending = Bool()
|
||||
val up = new BeatCounterStatus()
|
||||
val down = new BeatCounterStatus()
|
||||
}
|
||||
|
||||
/** Utility trait containing wiring functions to keep track of how many data beats have
|
||||
* been sent or recieved over a particular [[uncore.TileLinkChannel]] or pair of channels.
|
||||
*
|
||||
* Won't count message types that don't have data.
|
||||
* Used in [[uncore.XactTracker]] and [[uncore.FinishUnit]].
|
||||
*/
|
||||
trait HasDataBeatCounters {
|
||||
type HasBeat = TileLinkChannel with HasTileLinkBeatId
|
||||
type HasId = TileLinkChannel with HasClientId
|
||||
|
||||
/** Returns the current count on this channel and when a message is done
|
||||
* @param inc increment the counter (usually .valid or .fire())
|
||||
* @param data the actual channel data
|
||||
* @param beat count to return for single-beat messages
|
||||
*/
|
||||
def connectDataBeatCounter[S <: TileLinkChannel](inc: Bool, data: S, beat: UInt) = {
|
||||
val multi = data.hasMultibeatData()
|
||||
val (multi_cnt, multi_done) = Counter(inc && multi, data.tlDataBeats)
|
||||
val cnt = Mux(multi, multi_cnt, beat)
|
||||
val done = Mux(multi, multi_done, inc)
|
||||
(cnt, done)
|
||||
}
|
||||
|
||||
/** Counter for beats on outgoing [[chisel.DecoupledIO]] */
|
||||
def connectOutgoingDataBeatCounter[T <: TileLinkChannel](
|
||||
out: DecoupledIO[T],
|
||||
beat: UInt = UInt(0)): (UInt, Bool) =
|
||||
connectDataBeatCounter(out.fire(), out.bits, beat)
|
||||
|
||||
/** Returns done but not cnt. Use the addr_beat subbundle instead of cnt for beats on
|
||||
* incoming channels in case of network reordering.
|
||||
*/
|
||||
def connectIncomingDataBeatCounter[T <: TileLinkChannel](in: DecoupledIO[T]): Bool =
|
||||
connectDataBeatCounter(in.fire(), in.bits, UInt(0))._2
|
||||
|
||||
/** Counter for beats on incoming DecoupledIO[LogicalNetworkIO[]]s returns done */
|
||||
def connectIncomingDataBeatCounterWithHeader[T <: TileLinkChannel](in: DecoupledIO[LogicalNetworkIO[T]]): Bool =
|
||||
connectDataBeatCounter(in.fire(), in.bits.payload, UInt(0))._2
|
||||
|
||||
/** If the network might interleave beats from different messages, we need a Vec of counters,
|
||||
* one for every outstanding message id that might be interleaved.
|
||||
*
|
||||
* @param getId mapping from Message to counter id
|
||||
*/
|
||||
def connectIncomingDataBeatCountersWithHeader[T <: TileLinkChannel with HasClientTransactionId](
|
||||
in: DecoupledIO[LogicalNetworkIO[T]],
|
||||
entries: Int,
|
||||
getId: LogicalNetworkIO[T] => UInt): Vec[Bool] = {
|
||||
Vec((0 until entries).map { i =>
|
||||
connectDataBeatCounter(in.fire() && getId(in.bits) === UInt(i), in.bits.payload, UInt(0))._2
|
||||
})
|
||||
}
|
||||
|
||||
/** Provides counters on two channels, as well a meta-counter that tracks how many
|
||||
* messages have been sent over the up channel but not yet responded to over the down channel
|
||||
*
|
||||
* @param status bundle of status of the counters
|
||||
* @param up outgoing channel
|
||||
* @param down incoming channel
|
||||
* @param max max number of outstanding ups with no down
|
||||
* @param beat overrides cnts on single-beat messages
|
||||
* @param track whether up's message should be tracked
|
||||
* @return a tuple containing whether their are outstanding messages, up's count,
|
||||
* up's done, down's count, down's done
|
||||
*/
|
||||
def connectTwoWayBeatCounters[T <: TileLinkChannel, S <: TileLinkChannel](
|
||||
status: TwoWayBeatCounterStatus,
|
||||
up: DecoupledIO[T],
|
||||
down: DecoupledIO[S],
|
||||
max: Int = 1,
|
||||
beat: UInt = UInt(0),
|
||||
trackUp: T => Bool = (t: T) => Bool(true),
|
||||
trackDown: S => Bool = (s: S) => Bool(true)) {
|
||||
val (up_idx, up_done) = connectDataBeatCounter(up.fire() && trackUp(up.bits), up.bits, beat)
|
||||
val (dn_idx, dn_done) = connectDataBeatCounter(down.fire() && trackDown(down.bits), down.bits, beat)
|
||||
val cnt = TwoWayCounter(up_done, dn_done, max)
|
||||
status.pending := cnt > UInt(0)
|
||||
status.up.idx := up_idx
|
||||
status.up.done := up_done
|
||||
status.down.idx := dn_idx
|
||||
status.down.done := dn_done
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
56
src/main/scala/uncore/util/Enqueuer.scala
Normal file
56
src/main/scala/uncore/util/Enqueuer.scala
Normal file
@@ -0,0 +1,56 @@
|
||||
package uncore.util
|
||||
|
||||
import Chisel._
|
||||
import uncore.tilelink._
|
||||
import cde.Parameters
|
||||
|
||||
/** Struct for describing per-channel queue depths */
|
||||
case class TileLinkDepths(acq: Int, prb: Int, rel: Int, gnt: Int, fin: Int)
|
||||
|
||||
/** Optionally enqueues each [[uncore.TileLinkChannel]] individually */
|
||||
class TileLinkEnqueuer(depths: TileLinkDepths)(implicit p: Parameters) extends Module {
|
||||
val io = new Bundle {
|
||||
val client = new TileLinkIO().flip
|
||||
val manager = new TileLinkIO
|
||||
}
|
||||
io.manager.acquire <> (if(depths.acq > 0) Queue(io.client.acquire, depths.acq) else io.client.acquire)
|
||||
io.client.probe <> (if(depths.prb > 0) Queue(io.manager.probe, depths.prb) else io.manager.probe)
|
||||
io.manager.release <> (if(depths.rel > 0) Queue(io.client.release, depths.rel) else io.client.release)
|
||||
io.client.grant <> (if(depths.gnt > 0) Queue(io.manager.grant, depths.gnt) else io.manager.grant)
|
||||
io.manager.finish <> (if(depths.fin > 0) Queue(io.client.finish, depths.fin) else io.client.finish)
|
||||
}
|
||||
|
||||
object TileLinkEnqueuer {
|
||||
def apply(in: TileLinkIO, depths: TileLinkDepths)(implicit p: Parameters): TileLinkIO = {
|
||||
val t = Module(new TileLinkEnqueuer(depths))
|
||||
t.io.client <> in
|
||||
t.io.manager
|
||||
}
|
||||
def apply(in: TileLinkIO, depth: Int)(implicit p: Parameters): TileLinkIO = {
|
||||
apply(in, TileLinkDepths(depth, depth, depth, depth, depth))
|
||||
}
|
||||
}
|
||||
|
||||
class ClientTileLinkEnqueuer(depths: TileLinkDepths)(implicit p: Parameters) extends Module {
|
||||
val io = new Bundle {
|
||||
val inner = new ClientTileLinkIO().flip
|
||||
val outer = new ClientTileLinkIO
|
||||
}
|
||||
|
||||
io.outer.acquire <> (if(depths.acq > 0) Queue(io.inner.acquire, depths.acq) else io.inner.acquire)
|
||||
io.inner.probe <> (if(depths.prb > 0) Queue(io.outer.probe, depths.prb) else io.outer.probe)
|
||||
io.outer.release <> (if(depths.rel > 0) Queue(io.inner.release, depths.rel) else io.inner.release)
|
||||
io.inner.grant <> (if(depths.gnt > 0) Queue(io.outer.grant, depths.gnt) else io.outer.grant)
|
||||
io.outer.finish <> (if(depths.fin > 0) Queue(io.inner.finish, depths.fin) else io.inner.finish)
|
||||
}
|
||||
|
||||
object ClientTileLinkEnqueuer {
|
||||
def apply(in: ClientTileLinkIO, depths: TileLinkDepths)(implicit p: Parameters): ClientTileLinkIO = {
|
||||
val t = Module(new ClientTileLinkEnqueuer(depths))
|
||||
t.io.inner <> in
|
||||
t.io.outer
|
||||
}
|
||||
def apply(in: ClientTileLinkIO, depth: Int)(implicit p: Parameters): ClientTileLinkIO = {
|
||||
apply(in, TileLinkDepths(depth, depth, depth, depth, depth))
|
||||
}
|
||||
}
|
||||
25
src/main/scala/uncore/util/Package.scala
Normal file
25
src/main/scala/uncore/util/Package.scala
Normal file
@@ -0,0 +1,25 @@
|
||||
package uncore
|
||||
|
||||
import Chisel._
|
||||
|
||||
package object util {
|
||||
implicit class UIntIsOneOf(val x: UInt) extends AnyVal {
|
||||
def isOneOf(s: Seq[UInt]): Bool = s.map(x === _).reduce(_||_)
|
||||
|
||||
def isOneOf(u1: UInt, u2: UInt*): Bool = isOneOf(u1 +: u2.toSeq)
|
||||
}
|
||||
|
||||
implicit class SeqToAugmentedSeq[T <: Data](val x: Seq[T]) extends AnyVal {
|
||||
def apply(idx: UInt): T = {
|
||||
if (x.size == 1) {
|
||||
x.head
|
||||
} else {
|
||||
val half = 1 << (log2Ceil(x.size) - 1)
|
||||
val newIdx = idx & UInt(half - 1)
|
||||
Mux(idx >= UInt(half), x.drop(half)(newIdx), x.take(half)(newIdx))
|
||||
}
|
||||
}
|
||||
|
||||
def asUInt(): UInt = Cat(x.map(_.asUInt).reverse)
|
||||
}
|
||||
}
|
||||
69
src/main/scala/uncore/util/Serializer.scala
Normal file
69
src/main/scala/uncore/util/Serializer.scala
Normal file
@@ -0,0 +1,69 @@
|
||||
// See LICENSE for license details.
|
||||
|
||||
package uncore.util
|
||||
|
||||
import Chisel._
|
||||
import uncore.tilelink._
|
||||
|
||||
class FlowThroughSerializer[T <: Bundle with HasTileLinkData](gen: T, n: Int) extends Module {
|
||||
val io = new Bundle {
|
||||
val in = Decoupled(gen).flip
|
||||
val out = Decoupled(gen)
|
||||
val cnt = UInt(OUTPUT, log2Up(n))
|
||||
val done = Bool(OUTPUT)
|
||||
}
|
||||
val narrowWidth = io.in.bits.data.getWidth / n
|
||||
require(io.in.bits.data.getWidth % narrowWidth == 0)
|
||||
|
||||
if(n == 1) {
|
||||
io.out <> io.in
|
||||
io.cnt := UInt(0)
|
||||
io.done := Bool(true)
|
||||
} else {
|
||||
val cnt = Reg(init=UInt(0, width = log2Up(n)))
|
||||
val wrap = cnt === UInt(n-1)
|
||||
val rbits = Reg{io.in.bits}
|
||||
val active = Reg(init=Bool(false))
|
||||
|
||||
val shifter = Wire(Vec(n, Bits(width = narrowWidth)))
|
||||
(0 until n).foreach {
|
||||
i => shifter(i) := rbits.data((i+1)*narrowWidth-1,i*narrowWidth)
|
||||
}
|
||||
|
||||
io.done := Bool(false)
|
||||
io.cnt := cnt
|
||||
io.in.ready := !active
|
||||
io.out.valid := active || io.in.valid
|
||||
io.out.bits := io.in.bits
|
||||
when(!active && io.in.valid) {
|
||||
when(io.in.bits.hasData()) {
|
||||
cnt := Mux(io.out.ready, UInt(1), UInt(0))
|
||||
rbits := io.in.bits
|
||||
active := Bool(true)
|
||||
}
|
||||
io.done := !io.in.bits.hasData()
|
||||
}
|
||||
when(active) {
|
||||
io.out.bits := rbits
|
||||
io.out.bits.data := shifter(cnt)
|
||||
when(io.out.ready) {
|
||||
cnt := cnt + UInt(1)
|
||||
when(wrap) {
|
||||
cnt := UInt(0)
|
||||
io.done := Bool(true)
|
||||
active := Bool(false)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
object FlowThroughSerializer {
|
||||
def apply[T <: Bundle with HasTileLinkData](in: DecoupledIO[T], n: Int): DecoupledIO[T] = {
|
||||
val fs = Module(new FlowThroughSerializer(in.bits, n))
|
||||
fs.io.in.valid := in.valid
|
||||
fs.io.in.bits := in.bits
|
||||
in.ready := fs.io.in.ready
|
||||
fs.io.out
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user