[tl2] convert NBDcache to TL2 (WIP; compiles but untested)
This commit is contained in:
parent
5f1cc19d71
commit
8b908465e0
167
src/main/scala/rocket/HellaCache.scala
Normal file
167
src/main/scala/rocket/HellaCache.scala
Normal file
@ -0,0 +1,167 @@
|
||||
// See LICENSE for license details.
|
||||
|
||||
package rocket
|
||||
|
||||
import Chisel._
|
||||
import cde.{Parameters, Field}
|
||||
import diplomacy._
|
||||
import uncore.tilelink2._
|
||||
import uncore.agents._
|
||||
import uncore.constants._
|
||||
import uncore.tilelink.{TLKey, TLId}
|
||||
import util.ParameterizedBundle
|
||||
|
||||
case class DCacheConfig(
|
||||
nMSHRs: Int = 1,
|
||||
nSDQ: Int = 17,
|
||||
nRPQ: Int = 16,
|
||||
nMMIOs: Int = 1)
|
||||
|
||||
case object DCacheKey extends Field[DCacheConfig]
|
||||
|
||||
trait HasL1HellaCacheParameters extends HasCacheParameters with HasCoreParameters {
|
||||
val outerDataBeats = p(TLKey(p(TLId))).dataBeats
|
||||
val outerDataBits = p(TLKey(p(TLId))).dataBitsPerBeat
|
||||
|
||||
val refillCyclesPerBeat = outerDataBits/rowBits
|
||||
require(refillCyclesPerBeat == 1)
|
||||
|
||||
val refillCycles = refillCyclesPerBeat*outerDataBeats
|
||||
|
||||
val cacheBlockBytes = p(CacheBlockBytes)
|
||||
val lgCacheBlockBytes = log2Up(cacheBlockBytes)
|
||||
|
||||
val wordBits = xLen // really, xLen max
|
||||
val wordBytes = wordBits/8
|
||||
val wordOffBits = log2Up(wordBytes)
|
||||
val beatBytes = cacheBlockBytes / outerDataBeats
|
||||
val beatWords = beatBytes / wordBytes
|
||||
val beatOffBits = log2Up(beatBytes)
|
||||
val idxMSB = untagBits-1
|
||||
val idxLSB = blockOffBits
|
||||
val offsetmsb = idxLSB-1
|
||||
val offsetlsb = wordOffBits
|
||||
val rowWords = rowBits/wordBits
|
||||
val doNarrowRead = coreDataBits * nWays % rowBits == 0
|
||||
val encDataBits = code.width(coreDataBits)
|
||||
val encRowBits = encDataBits*rowWords
|
||||
val nIOMSHRs = 1
|
||||
val lrscCycles = 32 // ISA requires 16-insn LRSC sequences to succeed
|
||||
|
||||
require(isPow2(nSets))
|
||||
require(rowBits >= coreDataBits)
|
||||
require(rowBits <= outerDataBits)
|
||||
require(xLen <= outerDataBits) // would need offset addr for puts if data width < xlen
|
||||
require(!usingVM || untagBits <= pgIdxBits)
|
||||
}
|
||||
|
||||
abstract class L1HellaCacheModule(implicit val p: Parameters) extends Module
|
||||
with HasL1HellaCacheParameters
|
||||
|
||||
abstract class L1HellaCacheBundle(implicit val p: Parameters) extends ParameterizedBundle()(p)
|
||||
with HasL1HellaCacheParameters
|
||||
|
||||
class L1Metadata(implicit p: Parameters) extends Metadata()(p) with HasL1HellaCacheParameters {
|
||||
val coh = new ClientMetadata
|
||||
}
|
||||
object L1Metadata {
|
||||
def apply(tag: Bits, coh: ClientMetadata)(implicit p: Parameters) = {
|
||||
val meta = Wire(new L1Metadata)
|
||||
meta.tag := tag
|
||||
meta.coh := coh
|
||||
meta
|
||||
}
|
||||
}
|
||||
|
||||
class L1MetaReadReq(implicit p: Parameters) extends MetaReadReq {
|
||||
val tag = Bits(width = tagBits)
|
||||
override def cloneType = new L1MetaReadReq()(p).asInstanceOf[this.type] //TODO remove
|
||||
}
|
||||
|
||||
class L1MetaWriteReq(implicit p: Parameters) extends
|
||||
MetaWriteReq[L1Metadata](new L1Metadata)
|
||||
|
||||
trait HasCoreMemOp extends HasCoreParameters {
|
||||
val addr = UInt(width = coreMaxAddrBits)
|
||||
val tag = Bits(width = dcacheReqTagBits)
|
||||
val cmd = Bits(width = M_SZ)
|
||||
val typ = Bits(width = MT_SZ)
|
||||
}
|
||||
|
||||
trait HasCoreData extends HasCoreParameters {
|
||||
val data = Bits(width = coreDataBits)
|
||||
}
|
||||
|
||||
class HellaCacheReq(implicit p: Parameters) extends HellaCacheReqInternal()(p) with HasCoreData
|
||||
|
||||
class HellaCacheResp(implicit p: Parameters) extends CoreBundle()(p)
|
||||
with HasCoreMemOp
|
||||
with HasCoreData {
|
||||
val replay = Bool()
|
||||
val has_data = Bool()
|
||||
val data_word_bypass = Bits(width = coreDataBits)
|
||||
val store_data = Bits(width = coreDataBits)
|
||||
}
|
||||
|
||||
class AlignmentExceptions extends Bundle {
|
||||
val ld = Bool()
|
||||
val st = Bool()
|
||||
}
|
||||
|
||||
class HellaCacheExceptions extends Bundle {
|
||||
val ma = new AlignmentExceptions
|
||||
val pf = new AlignmentExceptions
|
||||
}
|
||||
|
||||
|
||||
// interface between D$ and processor/DTLB
|
||||
class HellaCacheIO(implicit p: Parameters) extends CoreBundle()(p) {
|
||||
val req = Decoupled(new HellaCacheReq)
|
||||
val s1_kill = Bool(OUTPUT) // kill previous cycle's req
|
||||
val s1_data = Bits(OUTPUT, coreDataBits) // data for previous cycle's req
|
||||
val s2_nack = Bool(INPUT) // req from two cycles ago is rejected
|
||||
|
||||
val resp = Valid(new HellaCacheResp).flip
|
||||
val replay_next = Bool(INPUT)
|
||||
val xcpt = (new HellaCacheExceptions).asInput
|
||||
val invalidate_lr = Bool(OUTPUT)
|
||||
val ordered = Bool(INPUT)
|
||||
}
|
||||
|
||||
abstract class HellaCache(val cfg: DCacheConfig)(implicit val p: Parameters) extends LazyModule {
|
||||
val node = TLClientNode(TLClientParameters(
|
||||
sourceId = IdRange(0, cfg.nMSHRs + cfg.nMMIOs),
|
||||
supportsProbe = TransferSizes(p(CacheBlockBytes))))
|
||||
val module: HellaCacheModule
|
||||
}
|
||||
|
||||
class HellaCacheBundle(outer: HellaCache)(implicit p: Parameters) extends Bundle {
|
||||
val cpu = (new HellaCacheIO).flip
|
||||
val ptw = new TLBPTWIO()
|
||||
val mem = outer.node.bundleOut
|
||||
}
|
||||
|
||||
class HellaCacheModule(outer: HellaCache)(implicit val p: Parameters) extends LazyModuleImp(outer)
|
||||
with HasL1HellaCacheParameters {
|
||||
implicit val cfg = outer.cfg
|
||||
val io = new HellaCacheBundle(outer)
|
||||
val edge = outer.node.edgesOut(0)
|
||||
val tl_out = io.mem(0)
|
||||
|
||||
/* TODO
|
||||
edge.manager.managers.foreach { m =>
|
||||
if (m.supportsGet) {
|
||||
require (m.supportsGet.contains(TransferSizes(1, tlDataBytes)))
|
||||
....etc
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
}
|
||||
|
||||
object HellaCache {
|
||||
def apply(cfg: DCacheConfig)(implicit p: Parameters) = {
|
||||
if (cfg.nMSHRs == 0) LazyModule(new DCache(cfg))
|
||||
else LazyModule(new NonBlockingDCache(cfg))
|
||||
}
|
||||
}
|
107
src/main/scala/rocket/ScratchpadSlavePort.scala
Normal file
107
src/main/scala/rocket/ScratchpadSlavePort.scala
Normal file
@ -0,0 +1,107 @@
|
||||
// See LICENSE for license details.
|
||||
|
||||
package rocket
|
||||
|
||||
import Chisel._
|
||||
import Chisel.ImplicitConversions._
|
||||
import cde.Parameters
|
||||
import junctions._
|
||||
import diplomacy._
|
||||
import uncore.constants._
|
||||
import uncore.tilelink2._
|
||||
import uncore.util._
|
||||
|
||||
class ScratchpadSlavePort(implicit val p: Parameters) extends LazyModule with HasCoreParameters {
|
||||
val node = TLManagerNode(TLManagerPortParameters(
|
||||
Seq(TLManagerParameters(
|
||||
address = List(AddressSet(0x80000000L, BigInt(p(DataScratchpadSize)-1))),
|
||||
regionType = RegionType.UNCACHED,
|
||||
executable = true,
|
||||
supportsArithmetic = if (p(UseAtomics)) TransferSizes(1, coreDataBytes) else TransferSizes.none,
|
||||
supportsLogical = if (p(UseAtomics)) TransferSizes(1, coreDataBytes) else TransferSizes.none,
|
||||
supportsPutPartial = TransferSizes(1, coreDataBytes),
|
||||
supportsPutFull = TransferSizes(1, coreDataBytes),
|
||||
supportsGet = TransferSizes(1, coreDataBytes),
|
||||
fifoId = Some(0))), // requests handled in FIFO order
|
||||
beatBytes = coreDataBytes,
|
||||
minLatency = 1))
|
||||
|
||||
// Make sure this ends up with the same name as before
|
||||
override def name = "dmem0"
|
||||
|
||||
lazy val module = new LazyModuleImp(this) {
|
||||
val io = new Bundle {
|
||||
val tl_in = node.bundleIn
|
||||
val dmem = new HellaCacheIO
|
||||
}
|
||||
|
||||
val tl_in = io.tl_in(0)
|
||||
val edge = node.edgesIn(0)
|
||||
|
||||
require(usingDataScratchpad)
|
||||
|
||||
val s_ready :: s_wait :: s_replay :: s_grant :: Nil = Enum(UInt(), 4)
|
||||
val state = Reg(init = s_ready)
|
||||
when (io.dmem.resp.valid) { state := s_grant }
|
||||
when (tl_in.d.fire()) { state := s_ready }
|
||||
when (io.dmem.s2_nack) { state := s_replay }
|
||||
when (io.dmem.req.fire()) { state := s_wait }
|
||||
|
||||
val acq = Reg(tl_in.a.bits)
|
||||
when (io.dmem.resp.valid) { acq.data := io.dmem.resp.bits.data }
|
||||
when (tl_in.a.fire()) { acq := tl_in.a.bits }
|
||||
|
||||
val isWrite = acq.opcode === TLMessages.PutFullData || acq.opcode === TLMessages.PutPartialData
|
||||
val isRead = !edge.hasData(acq)
|
||||
|
||||
def formCacheReq(acq: TLBundleA) = {
|
||||
val req = Wire(new HellaCacheReq)
|
||||
req.cmd := MuxLookup(acq.opcode, Wire(M_XRD), Array(
|
||||
TLMessages.PutFullData -> M_XWR,
|
||||
TLMessages.PutPartialData -> M_XWR,
|
||||
TLMessages.ArithmeticData -> MuxLookup(acq.param, Wire(M_XRD), Array(
|
||||
TLAtomics.MIN -> M_XA_MIN,
|
||||
TLAtomics.MAX -> M_XA_MAX,
|
||||
TLAtomics.MINU -> M_XA_MINU,
|
||||
TLAtomics.MAXU -> M_XA_MAXU,
|
||||
TLAtomics.ADD -> M_XA_ADD)),
|
||||
TLMessages.LogicalData -> MuxLookup(acq.param, Wire(M_XRD), Array(
|
||||
TLAtomics.XOR -> M_XA_XOR,
|
||||
TLAtomics.OR -> M_XA_OR,
|
||||
TLAtomics.AND -> M_XA_AND,
|
||||
TLAtomics.SWAP -> M_XA_SWAP)),
|
||||
TLMessages.Get -> M_XRD))
|
||||
// treat all loads as full words, so bytes appear in correct lane
|
||||
req.typ := Mux(isRead, log2Ceil(coreDataBytes), acq.size)
|
||||
req.addr := Mux(isRead, ~(~acq.address | (coreDataBytes-1)), acq.address)
|
||||
req.tag := UInt(0)
|
||||
req
|
||||
}
|
||||
|
||||
val ready = state === s_ready || tl_in.d.fire()
|
||||
io.dmem.req.valid := (tl_in.a.valid && ready) || state === s_replay
|
||||
tl_in.a.ready := io.dmem.req.ready && ready
|
||||
io.dmem.req.bits := formCacheReq(Mux(state === s_replay, acq, tl_in.a.bits))
|
||||
// the TL data is already in the correct byte lane, but the D$
|
||||
// expects right-justified store data, so that it can steer the bytes.
|
||||
io.dmem.s1_data := new LoadGen(acq.size, Bool(false), acq.address(log2Ceil(coreDataBytes)-1,0), acq.data, Bool(false), coreDataBytes).data
|
||||
io.dmem.s1_kill := false
|
||||
io.dmem.invalidate_lr := false
|
||||
|
||||
// place AMO data in correct word lane
|
||||
val minAMOBytes = 4
|
||||
val grantData = Mux(io.dmem.resp.valid, io.dmem.resp.bits.data, acq.data)
|
||||
val alignedGrantData = Mux(acq.size <= log2Ceil(minAMOBytes), Fill(coreDataBytes/minAMOBytes, grantData(8*minAMOBytes-1, 0)), grantData)
|
||||
|
||||
tl_in.d.valid := io.dmem.resp.valid || state === s_grant
|
||||
tl_in.d.bits := Mux(isWrite,
|
||||
edge.AccessAck(acq, UInt(0)),
|
||||
edge.AccessAck(acq, UInt(0), UInt(0)))
|
||||
tl_in.d.bits.data := alignedGrantData
|
||||
|
||||
// Tie off unused channels
|
||||
tl_in.b.valid := Bool(false)
|
||||
tl_in.c.ready := Bool(true)
|
||||
tl_in.e.ready := Bool(true)
|
||||
}
|
||||
}
|
136
src/main/scala/rocket/SimpleHellaCacheIF.scala
Normal file
136
src/main/scala/rocket/SimpleHellaCacheIF.scala
Normal file
@ -0,0 +1,136 @@
|
||||
// See LICENSE for license details.
|
||||
|
||||
package rocket
|
||||
|
||||
import Chisel._
|
||||
import Chisel.ImplicitConversions._
|
||||
import cde.Parameters
|
||||
import util._
|
||||
|
||||
/**
|
||||
* This module buffers requests made by the SimpleHellaCacheIF in case they
|
||||
* are nacked. Nacked requests must be replayed in order, and no other requests
|
||||
* must be allowed to go through until the replayed requests are successfully
|
||||
* completed.
|
||||
*/
|
||||
class SimpleHellaCacheIFReplayQueue(depth: Int)
|
||||
(implicit val p: Parameters) extends Module
|
||||
with HasL1HellaCacheParameters {
|
||||
val io = new Bundle {
|
||||
val req = Decoupled(new HellaCacheReq).flip
|
||||
val nack = Valid(Bits(width = coreDCacheReqTagBits)).flip
|
||||
val resp = Valid(new HellaCacheResp).flip
|
||||
val replay = Decoupled(new HellaCacheReq)
|
||||
}
|
||||
|
||||
// Registers to store the sent request
|
||||
// When a request is sent the first time,
|
||||
// it is stored in one of the reqs registers
|
||||
// and the corresponding inflight bit is set.
|
||||
// The reqs register will be deallocated once the request is
|
||||
// successfully completed.
|
||||
val inflight = Reg(init = UInt(0, depth))
|
||||
val reqs = Reg(Vec(depth, new HellaCacheReq))
|
||||
|
||||
// The nack queue stores the index of nacked requests (in the reqs vector)
|
||||
// in the order that they were nacked. A request is enqueued onto nackq
|
||||
// when it is newly nacked (i.e. not a nack for a previous replay).
|
||||
// The head of the nack queue will be replayed until it is
|
||||
// successfully completed, at which time the request is dequeued.
|
||||
// No new requests will be made or other replays attempted until the head
|
||||
// of the nackq is successfully completed.
|
||||
val nackq = Module(new Queue(UInt(width = log2Up(depth)), depth))
|
||||
val replaying = Reg(init = Bool(false))
|
||||
|
||||
val next_inflight_onehot = PriorityEncoderOH(~inflight)
|
||||
val next_inflight = OHToUInt(next_inflight_onehot)
|
||||
|
||||
val next_replay = nackq.io.deq.bits
|
||||
val next_replay_onehot = UIntToOH(next_replay)
|
||||
val next_replay_req = reqs(next_replay)
|
||||
|
||||
// Keep sending the head of the nack queue until it succeeds
|
||||
io.replay.valid := nackq.io.deq.valid && !replaying
|
||||
io.replay.bits := next_replay_req
|
||||
// Don't allow new requests if there is are replays waiting
|
||||
// or something being nacked.
|
||||
io.req.ready := !inflight.andR && !nackq.io.deq.valid && !io.nack.valid
|
||||
|
||||
// Match on the tags to determine the index of nacks or responses
|
||||
val nack_onehot = Cat(reqs.map(_.tag === io.nack.bits).reverse) & inflight
|
||||
val resp_onehot = Cat(reqs.map(_.tag === io.resp.bits.tag).reverse) & inflight
|
||||
|
||||
val replay_complete = io.resp.valid && replaying && io.resp.bits.tag === next_replay_req.tag
|
||||
val nack_head = io.nack.valid && nackq.io.deq.valid && io.nack.bits === next_replay_req.tag
|
||||
|
||||
// Enqueue to the nack queue if there is a nack that is not in response to
|
||||
// the previous replay
|
||||
nackq.io.enq.valid := io.nack.valid && !nack_head
|
||||
nackq.io.enq.bits := OHToUInt(nack_onehot)
|
||||
assert(!nackq.io.enq.valid || nackq.io.enq.ready,
|
||||
"SimpleHellaCacheIF: ReplayQueue nack queue overflow")
|
||||
|
||||
// Dequeue from the nack queue if the last replay was successfully completed
|
||||
nackq.io.deq.ready := replay_complete
|
||||
assert(!nackq.io.deq.ready || nackq.io.deq.valid,
|
||||
"SimpleHellaCacheIF: ReplayQueue nack queue underflow")
|
||||
|
||||
// Set inflight bit when a request is made
|
||||
// Clear it when it is successfully completed
|
||||
inflight := (inflight | Mux(io.req.fire(), next_inflight_onehot, UInt(0))) &
|
||||
~Mux(io.resp.valid, resp_onehot, UInt(0))
|
||||
|
||||
when (io.req.fire()) {
|
||||
reqs(next_inflight) := io.req.bits
|
||||
}
|
||||
|
||||
// Only one replay outstanding at a time
|
||||
when (io.replay.fire()) { replaying := Bool(true) }
|
||||
when (nack_head || replay_complete) { replaying := Bool(false) }
|
||||
}
|
||||
|
||||
// exposes a sane decoupled request interface
|
||||
class SimpleHellaCacheIF(implicit p: Parameters) extends Module
|
||||
{
|
||||
val io = new Bundle {
|
||||
val requestor = new HellaCacheIO().flip
|
||||
val cache = new HellaCacheIO
|
||||
}
|
||||
|
||||
val replayq = Module(new SimpleHellaCacheIFReplayQueue(2))
|
||||
val req_arb = Module(new Arbiter(new HellaCacheReq, 2))
|
||||
|
||||
val req_helper = DecoupledHelper(
|
||||
req_arb.io.in(1).ready,
|
||||
replayq.io.req.ready,
|
||||
io.requestor.req.valid)
|
||||
|
||||
req_arb.io.in(0) <> replayq.io.replay
|
||||
req_arb.io.in(1).valid := req_helper.fire(req_arb.io.in(1).ready)
|
||||
req_arb.io.in(1).bits := io.requestor.req.bits
|
||||
io.requestor.req.ready := req_helper.fire(io.requestor.req.valid)
|
||||
replayq.io.req.valid := req_helper.fire(replayq.io.req.ready)
|
||||
replayq.io.req.bits := io.requestor.req.bits
|
||||
|
||||
val s0_req_fire = io.cache.req.fire()
|
||||
val s1_req_fire = Reg(next = s0_req_fire)
|
||||
val s2_req_fire = Reg(next = s1_req_fire)
|
||||
val s1_req_tag = Reg(next = io.cache.req.bits.tag)
|
||||
val s2_req_tag = Reg(next = s1_req_tag)
|
||||
val s2_kill = Reg(next = io.cache.s1_kill)
|
||||
|
||||
io.cache.invalidate_lr := io.requestor.invalidate_lr
|
||||
io.cache.req <> req_arb.io.out
|
||||
io.cache.s1_kill := io.cache.s2_nack
|
||||
io.cache.s1_data := RegEnable(req_arb.io.out.bits.data, s0_req_fire)
|
||||
|
||||
replayq.io.nack.valid := (io.cache.s2_nack || s2_kill) && s2_req_fire
|
||||
replayq.io.nack.bits := s2_req_tag
|
||||
replayq.io.resp := io.cache.resp
|
||||
io.requestor.resp := io.cache.resp
|
||||
|
||||
assert(!Reg(next = io.cache.req.fire()) ||
|
||||
!(io.cache.xcpt.ma.ld || io.cache.xcpt.ma.st ||
|
||||
io.cache.xcpt.pf.ld || io.cache.xcpt.pf.st),
|
||||
"SimpleHellaCacheIF exception")
|
||||
}
|
@ -3,15 +3,15 @@
|
||||
package rocket
|
||||
|
||||
import Chisel._
|
||||
import junctions._
|
||||
import Chisel.ImplicitConversions._
|
||||
import cde.Parameters
|
||||
import diplomacy._
|
||||
import uncore.tilelink2._
|
||||
import uncore.constants._
|
||||
import uncore.agents._
|
||||
import uncore.constants._
|
||||
import uncore.tilelink2._
|
||||
import uncore.util._
|
||||
import util._
|
||||
import TLMessages._
|
||||
import Chisel.ImplicitConversions._
|
||||
import config._
|
||||
|
||||
class DCacheDataReq(implicit p: Parameters) extends L1HellaCacheBundle()(p) {
|
||||
@ -40,46 +40,17 @@ class DCacheDataArray(implicit p: Parameters) extends L1HellaCacheModule()(p) {
|
||||
}
|
||||
}
|
||||
|
||||
class DCache(maxUncachedInFlight: Int = 2)(implicit val p: Parameters) extends LazyModule with HasL1HellaCacheParameters {
|
||||
class DCache(cfg: DCacheConfig)(implicit p: Parameters) extends HellaCache(cfg)(p) {
|
||||
override lazy val module = new DCacheModule(this)
|
||||
}
|
||||
|
||||
val node = TLClientNode(TLClientParameters(
|
||||
sourceId = IdRange(0, maxUncachedInFlight),
|
||||
supportsProbe = TransferSizes(cacheBlockBytes)))
|
||||
class DCacheModule(outer: DCache)(implicit p: Parameters) extends HellaCacheModule(outer)(p) {
|
||||
|
||||
lazy val module = new LazyModuleImp(this) {
|
||||
val io = new Bundle {
|
||||
val cpu = (new HellaCacheIO).flip
|
||||
val ptw = new TLBPTWIO()
|
||||
val mem = node.bundleOut
|
||||
}
|
||||
val maxUncachedInFlight = cfg.nMMIOs
|
||||
|
||||
val edge = node.edgesOut(0)
|
||||
val tl_out = io.mem(0)
|
||||
|
||||
/* TODO
|
||||
edge.manager.managers.foreach { m =>
|
||||
// If a slave supports read at all, it must support all TL Legacy requires
|
||||
if (m.supportsGet) {
|
||||
require (m.supportsGet.contains(TransferSizes(1, tlDataBytes)))
|
||||
require (m.supportsGet.contains(TransferSizes(tlDataBeats * tlDataBytes)))
|
||||
}
|
||||
// Likewise, any put support must mean full put support
|
||||
if (m.supportsPutPartial) {
|
||||
require (m.supportsPutPartial.contains(TransferSizes(1, tlDataBytes)))
|
||||
require (m.supportsPutPartial.contains(TransferSizes(tlDataBeats * tlDataBytes)))
|
||||
}
|
||||
// Any atomic support => must support 32-bit size
|
||||
if (m.supportsArithmetic) { require (m.supportsArithmetic.contains(TransferSizes(4))) }
|
||||
if (m.supportsLogical) { require (m.supportsLogical .contains(TransferSizes(4))) }
|
||||
// We straight-up require Acquire support, this is a cache afterall?
|
||||
require (edge.manager.anySupportsAcquire)
|
||||
}
|
||||
*/
|
||||
require(rowBits == encRowBits) // no ECC
|
||||
require(refillCyclesPerBeat == 1)
|
||||
require(rowBits >= coreDataBits)
|
||||
|
||||
val grantackq = Module(new Queue(tl_out.e.bits,1))
|
||||
val grantackq = Module(new Queue(tl_out.e.bits,1)) // TODO don't need this in scratchpad mode
|
||||
|
||||
// tags
|
||||
val replacer = p(Replacer)()
|
||||
@ -530,100 +501,4 @@ class DCache(maxUncachedInFlight: Int = 2)(implicit val p: Parameters) extends L
|
||||
flushing := false
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
class ScratchpadSlavePort(implicit val p: Parameters) extends LazyModule with HasCoreParameters {
|
||||
val node = TLManagerNode(TLManagerPortParameters(
|
||||
Seq(TLManagerParameters(
|
||||
address = List(AddressSet(0x80000000L, BigInt(p(DataScratchpadSize)-1))),
|
||||
regionType = RegionType.UNCACHED,
|
||||
executable = true,
|
||||
supportsArithmetic = if (p(UseAtomics)) TransferSizes(1, coreDataBytes) else TransferSizes.none,
|
||||
supportsLogical = if (p(UseAtomics)) TransferSizes(1, coreDataBytes) else TransferSizes.none,
|
||||
supportsPutPartial = TransferSizes(1, coreDataBytes),
|
||||
supportsPutFull = TransferSizes(1, coreDataBytes),
|
||||
supportsGet = TransferSizes(1, coreDataBytes),
|
||||
fifoId = Some(0))), // requests handled in FIFO order
|
||||
beatBytes = coreDataBytes,
|
||||
minLatency = 1))
|
||||
|
||||
// Make sure this ends up with the same name as before
|
||||
override def name = "dmem0"
|
||||
|
||||
lazy val module = new LazyModuleImp(this) {
|
||||
val io = new Bundle {
|
||||
val tl_in = node.bundleIn
|
||||
val dmem = new HellaCacheIO
|
||||
}
|
||||
|
||||
val tl_in = io.tl_in(0)
|
||||
val edge = node.edgesIn(0)
|
||||
|
||||
require(usingDataScratchpad)
|
||||
|
||||
val s_ready :: s_wait :: s_replay :: s_grant :: Nil = Enum(UInt(), 4)
|
||||
val state = Reg(init = s_ready)
|
||||
when (io.dmem.resp.valid) { state := s_grant }
|
||||
when (tl_in.d.fire()) { state := s_ready }
|
||||
when (io.dmem.s2_nack) { state := s_replay }
|
||||
when (io.dmem.req.fire()) { state := s_wait }
|
||||
|
||||
val acq = Reg(tl_in.a.bits)
|
||||
when (io.dmem.resp.valid) { acq.data := io.dmem.resp.bits.data }
|
||||
when (tl_in.a.fire()) { acq := tl_in.a.bits }
|
||||
|
||||
val isWrite = acq.opcode === TLMessages.PutFullData || acq.opcode === TLMessages.PutPartialData
|
||||
val isRead = !edge.hasData(acq)
|
||||
|
||||
def formCacheReq(acq: TLBundleA) = {
|
||||
val req = Wire(new HellaCacheReq)
|
||||
req.cmd := MuxLookup(acq.opcode, Wire(M_XRD), Array(
|
||||
TLMessages.PutFullData -> M_XWR,
|
||||
TLMessages.PutPartialData -> M_XWR,
|
||||
TLMessages.ArithmeticData -> MuxLookup(acq.param, Wire(M_XRD), Array(
|
||||
TLAtomics.MIN -> M_XA_MIN,
|
||||
TLAtomics.MAX -> M_XA_MAX,
|
||||
TLAtomics.MINU -> M_XA_MINU,
|
||||
TLAtomics.MAXU -> M_XA_MAXU,
|
||||
TLAtomics.ADD -> M_XA_ADD)),
|
||||
TLMessages.LogicalData -> MuxLookup(acq.param, Wire(M_XRD), Array(
|
||||
TLAtomics.XOR -> M_XA_XOR,
|
||||
TLAtomics.OR -> M_XA_OR,
|
||||
TLAtomics.AND -> M_XA_AND,
|
||||
TLAtomics.SWAP -> M_XA_SWAP)),
|
||||
TLMessages.Get -> M_XRD))
|
||||
// treat all loads as full words, so bytes appear in correct lane
|
||||
req.typ := Mux(isRead, log2Ceil(coreDataBytes), acq.size)
|
||||
req.addr := Mux(isRead, ~(~acq.address | (coreDataBytes-1)), acq.address)
|
||||
req.tag := UInt(0)
|
||||
req
|
||||
}
|
||||
|
||||
val ready = state === s_ready || tl_in.d.fire()
|
||||
io.dmem.req.valid := (tl_in.a.valid && ready) || state === s_replay
|
||||
tl_in.a.ready := io.dmem.req.ready && ready
|
||||
io.dmem.req.bits := formCacheReq(Mux(state === s_replay, acq, tl_in.a.bits))
|
||||
// the TL data is already in the correct byte lane, but the D$
|
||||
// expects right-justified store data, so that it can steer the bytes.
|
||||
io.dmem.s1_data := new LoadGen(acq.size, Bool(false), acq.address(log2Ceil(coreDataBytes)-1,0), acq.data, Bool(false), coreDataBytes).data
|
||||
io.dmem.s1_kill := false
|
||||
io.dmem.invalidate_lr := false
|
||||
|
||||
// place AMO data in correct word lane
|
||||
val minAMOBytes = 4
|
||||
val grantData = Mux(io.dmem.resp.valid, io.dmem.resp.bits.data, acq.data)
|
||||
val alignedGrantData = Mux(acq.size <= log2Ceil(minAMOBytes), Fill(coreDataBytes/minAMOBytes, grantData(8*minAMOBytes-1, 0)), grantData)
|
||||
|
||||
tl_in.d.valid := io.dmem.resp.valid || state === s_grant
|
||||
tl_in.d.bits := Mux(isWrite,
|
||||
edge.AccessAck(acq, UInt(0)),
|
||||
edge.AccessAck(acq, UInt(0), UInt(0)))
|
||||
tl_in.d.bits.data := alignedGrantData
|
||||
|
||||
// Tie off unused channels
|
||||
tl_in.b.valid := Bool(false)
|
||||
tl_in.c.ready := Bool(true)
|
||||
tl_in.e.ready := Bool(true)
|
||||
}
|
||||
}
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -13,6 +13,14 @@ object TLArbiter
|
||||
val lowestIndexFirst: Policy = (valids, granted) =>
|
||||
valids.scanLeft(Bool(true))(_ && !_).init
|
||||
|
||||
def lowestFromSeq[T <: TLChannel](edge: TLEdge, sink: DecoupledIO[T], sources: Seq[DecoupledIO[T]]) {
|
||||
apply(lowestIndexFirst)(sink, sources.map(s => (edge.numBeats1(s.bits), s)):_*)
|
||||
}
|
||||
|
||||
def lowest[T <: TLChannel](edge: TLEdge, sink: DecoupledIO[T], sources: DecoupledIO[T]*) {
|
||||
apply(lowestIndexFirst)(sink, sources.toList.map(s => (edge.numBeats1(s.bits), s)):_*)
|
||||
}
|
||||
|
||||
def apply[T <: Data](policy: Policy)(sink: DecoupledIO[T], sources: (UInt, DecoupledIO[T])*) {
|
||||
if (sources.isEmpty) {
|
||||
sink.valid := Bool(false)
|
||||
|
@ -136,11 +136,9 @@ class TLBroadcast(lineBytes: Int, numTrackers: Int = 4, bufferless: Boolean = fa
|
||||
putfull.bits := edgeOut.Put(Cat(put_what, in.c.bits.source), in.c.bits.address, in.c.bits.size, in.c.bits.data)._2
|
||||
|
||||
// Combine ReleaseAck or the modified D
|
||||
TLArbiter(TLArbiter.lowestIndexFirst)(in.d, (UInt(0), releaseack), (edgeOut.numBeats1(d_normal.bits), d_normal))
|
||||
TLArbiter.lowest(edgeOut, in.d, releaseack, d_normal)
|
||||
// Combine the PutFull with the trackers
|
||||
TLArbiter(TLArbiter.lowestIndexFirst)(out.a,
|
||||
((edgeOut.numBeats1(putfull.bits), putfull) +:
|
||||
trackers.map { t => (edgeOut.numBeats1(t.out_a.bits), t.out_a) }):_*)
|
||||
TLArbiter.lowestFromSeq(edgeOut, out.a, putfull +: trackers.map(_.out_a))
|
||||
|
||||
// The Probe FSM walks all caches and probes them
|
||||
val probe_todo = RegInit(UInt(0, width = max(1, caches.size)))
|
||||
|
@ -191,27 +191,32 @@ class TLEdge(
|
||||
|
||||
def first(bits: TLChannel, fire: Bool): Bool = firstlastHelper(bits, fire)._1
|
||||
def first(x: DecoupledIO[TLChannel]): Bool = first(x.bits, x.fire())
|
||||
def first(x: ValidIO[TLChannel]): Bool = first(x.bits, x.valid)
|
||||
|
||||
def last(bits: TLChannel, fire: Bool): Bool = firstlastHelper(bits, fire)._2
|
||||
def last(x: DecoupledIO[TLChannel]): Bool = last(x.bits, x.fire())
|
||||
def last(x: ValidIO[TLChannel]): Bool = last(x.bits, x.valid)
|
||||
|
||||
def firstlast(bits: TLChannel, fire: Bool): (Bool, Bool, Bool) = {
|
||||
val r = firstlastHelper(bits, fire)
|
||||
(r._1, r._2, r._3)
|
||||
}
|
||||
def firstlast(x: DecoupledIO[TLChannel]): (Bool, Bool, Bool) = firstlast(x.bits, x.fire())
|
||||
def firstlast(x: ValidIO[TLChannel]): (Bool, Bool, Bool) = firstlast(x.bits, x.valid)
|
||||
|
||||
def count(bits: TLChannel, fire: Bool): (Bool, Bool, Bool, UInt) = {
|
||||
val r = firstlastHelper(bits, fire)
|
||||
(r._1, r._2, r._3, r._4)
|
||||
}
|
||||
def count(x: DecoupledIO[TLChannel]): (Bool, Bool, Bool, UInt) = count(x.bits, x.fire())
|
||||
def count(x: ValidIO[TLChannel]): (Bool, Bool, Bool, UInt) = count(x.bits, x.valid)
|
||||
|
||||
def addr_inc(bits: TLChannel, fire: Bool): (Bool, Bool, Bool, UInt) = {
|
||||
val r = firstlastHelper(bits, fire)
|
||||
(r._1, r._2, r._3, r._4 << log2Ceil(manager.beatBytes))
|
||||
}
|
||||
def addr_inc(x: DecoupledIO[TLChannel]): (Bool, Bool, Bool, UInt) = addr_inc(x.bits, x.fire())
|
||||
def addr_inc(x: ValidIO[TLChannel]): (Bool, Bool, Bool, UInt) = addr_inc(x.bits, x.valid)
|
||||
}
|
||||
|
||||
class TLEdgeOut(
|
||||
|
Loading…
Reference in New Issue
Block a user