2016-09-21 01:49:57 +02:00
|
|
|
// See LICENSE for license details.
|
|
|
|
|
|
|
|
package uncore.tilelink2
|
|
|
|
|
|
|
|
import Chisel._
|
|
|
|
import chisel3.internal.sourceinfo.SourceInfo
|
2016-10-04 00:17:36 +02:00
|
|
|
import diplomacy._
|
2016-09-21 01:49:57 +02:00
|
|
|
import scala.math.{min,max}
|
|
|
|
|
|
|
|
// Ensures that all downstream RW managers support Atomic operationss.
|
|
|
|
// If !passthrough, intercept all Atomics. Otherwise, only intercept those unsupported downstream.
|
|
|
|
class TLAtomicAutomata(logical: Boolean = true, arithmetic: Boolean = true, concurrency: Int = 1, passthrough: Boolean = true) extends LazyModule
|
|
|
|
{
|
|
|
|
require (concurrency >= 1)
|
|
|
|
|
|
|
|
val node = TLAdapterNode(
|
|
|
|
clientFn = { case Seq(cp) => require (!cp.unsafeAtomics); cp.copy(unsafeAtomics = true) },
|
|
|
|
managerFn = { case Seq(mp) => mp.copy(managers = mp.managers.map { m =>
|
|
|
|
val ourSupport = TransferSizes(1, mp.beatBytes)
|
|
|
|
def widen(x: TransferSizes) = if (passthrough && x.min <= 2*mp.beatBytes) TransferSizes(1, max(mp.beatBytes, x.max)) else ourSupport
|
|
|
|
val canDoit = m.supportsPutFull.contains(ourSupport) && m.supportsGet.contains(ourSupport)
|
|
|
|
// Blow up if there are devices to which we cannot add Atomics, because their R|W are too inflexible
|
|
|
|
require (!m.supportsPutFull || !m.supportsGet || canDoit)
|
|
|
|
m.copy(
|
|
|
|
supportsArithmetic = if (!arithmetic || !canDoit) m.supportsArithmetic else widen(m.supportsArithmetic),
|
|
|
|
supportsLogical = if (!logical || !canDoit) m.supportsLogical else widen(m.supportsLogical))
|
|
|
|
})})
|
|
|
|
|
|
|
|
lazy val module = new LazyModuleImp(this) {
|
|
|
|
val io = new Bundle {
|
|
|
|
val in = node.bundleIn
|
|
|
|
val out = node.bundleOut
|
|
|
|
}
|
|
|
|
|
|
|
|
val in = io.in(0)
|
|
|
|
val out = io.out(0)
|
|
|
|
val edgeIn = node.edgesIn(0)
|
|
|
|
val edgeOut = node.edgesOut(0)
|
|
|
|
val managers = edgeOut.manager.managers
|
2016-09-23 00:13:35 +02:00
|
|
|
val beatBytes = edgeOut.manager.beatBytes
|
2016-09-21 01:49:57 +02:00
|
|
|
|
|
|
|
// To which managers are we adding atomic support?
|
|
|
|
val ourSupport = TransferSizes(1, edgeOut.manager.beatBytes)
|
|
|
|
val managersNeedingHelp = managers.filter { m =>
|
|
|
|
m.supportsPutFull.contains(ourSupport) &&
|
|
|
|
m.supportsGet.contains(ourSupport) &&
|
|
|
|
((logical && !m.supportsLogical .contains(ourSupport)) ||
|
|
|
|
(arithmetic && !m.supportsArithmetic.contains(ourSupport)) ||
|
|
|
|
!passthrough) // we will do atomics for everyone we can
|
|
|
|
}
|
|
|
|
// We cannot add atomcis to a non-FIFO manager
|
|
|
|
managersNeedingHelp foreach { m => require (m.fifoId.isDefined) }
|
|
|
|
// We need to preserve FIFO semantics across FIFO domains, not managers
|
|
|
|
// Suppose you have Put(42) Atomic(+1) both inflight; valid results: 42 or 43
|
|
|
|
// If we allow Put(42) Get() Put(+1) concurrent; valid results: 42 43 OR undef
|
|
|
|
// Making non-FIFO work requires waiting for all Acks to come back (=> use FIFOFixer)
|
|
|
|
val domainsNeedingHelp = managersNeedingHelp.map(_.fifoId.get).distinct
|
|
|
|
// Don't overprovision the CAM
|
|
|
|
val camSize = min(domainsNeedingHelp.size, concurrency)
|
|
|
|
// Compact the fifoIds to only those we care about
|
|
|
|
val camFifoIds = managers.map(m => UInt(m.fifoId.map(id => max(0, domainsNeedingHelp.indexOf(id))).getOrElse(0)))
|
|
|
|
|
|
|
|
// CAM entry state machine
|
|
|
|
val FREE = UInt(0) // unused waiting on Atomic from A
|
|
|
|
val GET = UInt(3) // Get sent down A waiting on AccessDataAck from D
|
|
|
|
val AMO = UInt(2) // AccessDataAck sent up D waiting for A availability
|
|
|
|
val ACK = UInt(1) // Put sent down A waiting for PutAck from D
|
|
|
|
|
|
|
|
def helper(select: Seq[Bool], x: Seq[TransferSizes], lgSize: UInt) =
|
|
|
|
if (!passthrough) Bool(false) else
|
|
|
|
if (x.map(_ == x(0)).reduce(_ && _)) x(0).containsLg(lgSize) else
|
|
|
|
Mux1H(select, x.map(_.containsLg(lgSize)))
|
|
|
|
|
|
|
|
// Do we need to do anything at all?
|
|
|
|
if (camSize > 0) {
|
|
|
|
class CAM_S extends Bundle {
|
|
|
|
val state = UInt(width = 2)
|
|
|
|
}
|
|
|
|
class CAM_A extends Bundle {
|
|
|
|
val bits = new TLBundleA(out.a.bits.params)
|
|
|
|
val fifoId = UInt(width = log2Up(domainsNeedingHelp.size))
|
2016-09-22 05:08:17 +02:00
|
|
|
val lut = UInt(width = 4)
|
2016-09-21 01:49:57 +02:00
|
|
|
}
|
|
|
|
class CAM_D extends Bundle {
|
|
|
|
val data = UInt(width = out.a.bits.params.dataBits)
|
|
|
|
}
|
|
|
|
|
|
|
|
val initval = Wire(new CAM_S)
|
|
|
|
initval.state := FREE
|
|
|
|
val cam_s = RegInit(Vec.fill(camSize)(initval))
|
|
|
|
val cam_a = Reg(Vec(camSize, new CAM_A))
|
|
|
|
val cam_d = Reg(Vec(camSize, new CAM_D))
|
|
|
|
|
|
|
|
val cam_free = cam_s.map(_.state === FREE)
|
|
|
|
val cam_amo = cam_s.map(_.state === AMO)
|
|
|
|
val cam_abusy = cam_s.map(e => e.state === GET || e.state === AMO) // A is blocked
|
|
|
|
val cam_dmatch = cam_s.map(e => e.state === GET || e.state === ACK) // D should inspect these entries
|
|
|
|
|
|
|
|
// Can the manager already handle this message?
|
|
|
|
val a_size = edgeIn.size(in.a.bits)
|
|
|
|
val a_select = edgeOut.manager.findFast(edgeIn.address(in.a.bits))
|
|
|
|
val a_canLogical = helper(a_select, managers.map(_.supportsLogical), a_size)
|
|
|
|
val a_canArithmetic = helper(a_select, managers.map(_.supportsArithmetic), a_size)
|
|
|
|
val a_isLogical = in.a.bits.opcode === TLMessages.LogicalData
|
|
|
|
val a_isArithmetic = in.a.bits.opcode === TLMessages.ArithmeticData
|
|
|
|
val a_isSupported = Mux(a_isLogical, a_canLogical, Mux(a_isArithmetic, a_canArithmetic, Bool(true)))
|
|
|
|
|
|
|
|
// Must we do a Put?
|
|
|
|
val a_cam_any_put = cam_amo.reduce(_ || _)
|
|
|
|
val a_cam_por_put = cam_amo.scanLeft(Bool(false))(_||_).init
|
|
|
|
val a_cam_sel_put = (cam_amo zip a_cam_por_put) map { case (a, b) => a && !b }
|
2016-09-22 05:08:17 +02:00
|
|
|
val a_cam_a = PriorityMux(cam_amo, cam_a)
|
|
|
|
val a_cam_d = PriorityMux(cam_amo, cam_d)
|
2016-09-22 06:59:05 +02:00
|
|
|
val a_a = a_cam_a.bits.data
|
|
|
|
val a_d = a_cam_d.data
|
2016-09-21 01:49:57 +02:00
|
|
|
|
|
|
|
// Does the A request conflict with an inflight AMO?
|
|
|
|
val a_fifoId = Mux1H(a_select, camFifoIds)
|
|
|
|
val a_cam_busy = (cam_abusy zip cam_a.map(_.fifoId === a_fifoId)) map { case (a,b) => a&&b } reduce (_||_)
|
|
|
|
|
|
|
|
// (Where) are we are allocating in the CAM?
|
|
|
|
val a_cam_any_free = cam_free.reduce(_ || _)
|
|
|
|
val a_cam_por_free = cam_free.scanLeft(Bool(false))(_||_).init
|
|
|
|
val a_cam_sel_free = (cam_free zip a_cam_por_free) map { case (a,b) => a && !b }
|
|
|
|
|
2016-09-22 05:08:17 +02:00
|
|
|
// Logical AMO
|
2016-09-23 00:13:35 +02:00
|
|
|
val indexes = Seq.tabulate(beatBytes*8) { i => Cat(a_a(i,i), a_d(i,i)) }
|
|
|
|
val logic_out = Cat(indexes.map(x => a_cam_a.lut(x).asUInt).reverse)
|
2016-09-22 05:08:17 +02:00
|
|
|
|
|
|
|
// Arithmetic AMO
|
2016-09-22 06:59:05 +02:00
|
|
|
val unsigned = a_cam_a.bits.param(1)
|
|
|
|
val take_max = a_cam_a.bits.param(0)
|
|
|
|
val adder = a_cam_a.bits.param(2)
|
2016-09-23 00:13:35 +02:00
|
|
|
val mask = a_cam_a.bits.mask
|
|
|
|
val signSel = ~(~mask | (mask >> 1))
|
|
|
|
val signbits_a = Cat(Seq.tabulate(beatBytes) { i => a_a(8*i+7,8*i+7) } .reverse)
|
|
|
|
val signbits_d = Cat(Seq.tabulate(beatBytes) { i => a_d(8*i+7,8*i+7) } .reverse)
|
|
|
|
// Move the selected sign bit into the first byte position it will extend
|
|
|
|
val signbit_a = ((signbits_a & signSel) << 1)(beatBytes-1, 0)
|
|
|
|
val signbit_d = ((signbits_d & signSel) << 1)(beatBytes-1, 0)
|
2016-10-11 19:29:31 +02:00
|
|
|
val signext_a = FillInterleaved(8, leftOR(signbit_a))
|
|
|
|
val signext_d = FillInterleaved(8, leftOR(signbit_d))
|
2016-09-22 06:59:05 +02:00
|
|
|
// NOTE: sign-extension does not change the relative ordering in EITHER unsigned or signed arithmetic
|
2016-09-23 00:13:35 +02:00
|
|
|
val wide_mask = FillInterleaved(8, mask)
|
|
|
|
val a_a_ext = (a_a & wide_mask) | signext_a
|
|
|
|
val a_d_ext = (a_d & wide_mask) | signext_d
|
2016-09-22 06:59:05 +02:00
|
|
|
val a_d_inv = Mux(adder, a_d_ext, ~a_d_ext)
|
|
|
|
val adder_out = a_a_ext + a_d_inv
|
2016-09-23 00:13:35 +02:00
|
|
|
val h = 8*beatBytes-1 // now sign-extended; use biggest bit
|
2016-09-22 06:59:05 +02:00
|
|
|
val a_bigger_uneq = unsigned === a_a_ext(h) // result if high bits are unequal
|
|
|
|
val a_bigger = Mux(a_a_ext(h) === a_d_ext(h), !adder_out(h), a_bigger_uneq)
|
|
|
|
val pick_a = take_max === a_bigger
|
|
|
|
val arith_out = Mux(adder, adder_out, Mux(pick_a, a_a, a_d))
|
2016-09-22 05:08:17 +02:00
|
|
|
|
|
|
|
// AMO result data
|
|
|
|
val amo_data =
|
|
|
|
if (!logical) arith_out else
|
|
|
|
if (!arithmetic) logic_out else
|
|
|
|
Mux(a_cam_a.bits.opcode(0), logic_out, arith_out)
|
2016-09-21 01:49:57 +02:00
|
|
|
|
|
|
|
// Potentially mutate the message from inner
|
|
|
|
val source_i = Wire(in.a)
|
|
|
|
val a_allow = !a_cam_busy && (a_isSupported || a_cam_any_free)
|
|
|
|
in.a.ready := source_i.ready && a_allow
|
|
|
|
source_i.valid := in.a.valid && a_allow
|
|
|
|
source_i.bits := in.a.bits
|
|
|
|
when (!a_isSupported) { // minimal mux difference
|
|
|
|
source_i.bits.opcode := TLMessages.Get
|
|
|
|
source_i.bits.param := UInt(0)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Potentially take the message from the CAM
|
|
|
|
val source_c = Wire(in.a)
|
|
|
|
source_c.valid := a_cam_any_put
|
2016-09-22 05:08:17 +02:00
|
|
|
source_c.bits := edgeOut.Put(a_cam_a.bits.source, edgeIn.address(a_cam_a.bits), a_cam_a.bits.size, amo_data)._2
|
2016-09-21 01:49:57 +02:00
|
|
|
|
|
|
|
// Finishing an AMO from the CAM has highest priority
|
|
|
|
TLArbiter(TLArbiter.lowestIndexFirst)(out.a, (UInt(1), source_c), (edgeOut.numBeats(in.a.bits), source_i))
|
|
|
|
|
|
|
|
// Capture the A state into the CAM
|
|
|
|
when (source_i.fire() && !a_isSupported) {
|
|
|
|
(a_cam_sel_free zip cam_a) foreach { case (en, r) =>
|
|
|
|
when (en) {
|
|
|
|
r.fifoId := a_fifoId
|
|
|
|
r.bits := in.a.bits
|
2016-09-22 05:08:17 +02:00
|
|
|
r.lut := MuxLookup(in.a.bits.param(1, 0), UInt(0, width = 4), Array(
|
|
|
|
TLAtomics.AND -> UInt(0x8),
|
|
|
|
TLAtomics.OR -> UInt(0xe),
|
|
|
|
TLAtomics.XOR -> UInt(0x6),
|
|
|
|
TLAtomics.SWAP -> UInt(0xc)))
|
2016-09-21 01:49:57 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
(a_cam_sel_free zip cam_s) foreach { case (en, r) =>
|
|
|
|
when (en) {
|
|
|
|
r.state := GET
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Advance the put state
|
|
|
|
when (source_c.fire()) {
|
|
|
|
(a_cam_sel_put zip cam_s) foreach { case (en, r) =>
|
|
|
|
when (en) {
|
|
|
|
r.state := ACK
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// We need to deal with a potential D response in the same cycle as the A request
|
|
|
|
val d_cam_sel_raw = cam_a.map(_.bits.source === in.d.bits.source)
|
|
|
|
val d_cam_sel_match = (d_cam_sel_raw zip cam_dmatch) map { case (a,b) => a&&b }
|
|
|
|
val d_cam_data = Mux1H(d_cam_sel_match, cam_d.map(_.data))
|
2016-09-22 03:03:05 +02:00
|
|
|
val d_cam_sel_bypass = if (edgeOut.manager.minLatency > 0) Bool(false) else
|
|
|
|
out.d.bits.source === in.a.bits.source && in.a.valid && out.d.valid && !a_isSupported
|
2016-09-21 01:49:57 +02:00
|
|
|
val d_cam_sel = (a_cam_sel_free zip d_cam_sel_match) map { case (a,d) => Mux(d_cam_sel_bypass, a, d) }
|
|
|
|
val d_cam_sel_any = d_cam_sel_bypass || d_cam_sel_match.reduce(_ || _)
|
|
|
|
val d_ackd = out.d.bits.opcode === TLMessages.AccessAckData
|
|
|
|
val d_ack = out.d.bits.opcode === TLMessages.AccessAck
|
|
|
|
|
|
|
|
when (out.d.fire()) {
|
|
|
|
(d_cam_sel zip cam_d) foreach { case (en, r) =>
|
|
|
|
when (en && d_ackd) {
|
|
|
|
r.data := out.d.bits.data
|
|
|
|
}
|
|
|
|
}
|
|
|
|
(d_cam_sel zip cam_s) foreach { case (en, r) =>
|
|
|
|
when (en) {
|
|
|
|
// Note: it is important that this comes AFTER the := GET, so we can go FREE=>GET=>AMO in one cycle
|
|
|
|
r.state := Mux(d_ackd, AMO, FREE)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
val d_drop = d_ackd && d_cam_sel_any
|
|
|
|
val d_replace = d_ack && d_cam_sel_match.reduce(_ || _)
|
|
|
|
|
|
|
|
in.d.valid := out.d.valid && !d_drop
|
|
|
|
out.d.ready := in.d.ready || d_drop
|
|
|
|
|
|
|
|
in.d.bits := out.d.bits
|
|
|
|
when (d_replace) { // minimal muxes
|
|
|
|
in.d.bits.opcode := TLMessages.AccessAckData
|
|
|
|
in.d.bits.data := d_cam_data
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
out.a.valid := in.a.valid
|
|
|
|
in.a.ready := out.a.ready
|
|
|
|
out.a.bits := in.a.bits
|
|
|
|
|
|
|
|
in.d.valid := out.d.valid
|
|
|
|
out.d.ready := in.d.ready
|
|
|
|
in.d.bits := out.d.bits
|
|
|
|
}
|
|
|
|
|
|
|
|
if (edgeOut.manager.anySupportAcquire && edgeIn.client.anySupportProbe) {
|
|
|
|
in.b.valid := out.b.valid
|
|
|
|
out.b.ready := in.b.ready
|
|
|
|
in.b.bits := out.b.bits
|
|
|
|
|
|
|
|
out.c.valid := in.c.valid
|
|
|
|
in.c.ready := out.c.ready
|
|
|
|
out.c.bits := in.c.bits
|
|
|
|
|
|
|
|
out.e.valid := in.e.valid
|
|
|
|
in.e.ready := out.e.ready
|
|
|
|
out.e.bits := in.e.bits
|
|
|
|
} else {
|
|
|
|
in.b.valid := Bool(false)
|
|
|
|
in.c.ready := Bool(true)
|
|
|
|
in.e.ready := Bool(true)
|
|
|
|
out.b.ready := Bool(true)
|
|
|
|
out.c.valid := Bool(false)
|
|
|
|
out.e.valid := Bool(false)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
object TLAtomicAutomata
|
|
|
|
{
|
|
|
|
// applied to the TL source node; y.node := TLAtomicAutomata(x.node)
|
2016-09-30 10:39:35 +02:00
|
|
|
def apply(logical: Boolean = true, arithmetic: Boolean = true, concurrency: Int = 1, passthrough: Boolean = true)(x: TLOutwardNode)(implicit sourceInfo: SourceInfo): TLOutwardNode = {
|
2016-09-21 01:49:57 +02:00
|
|
|
val atomics = LazyModule(new TLAtomicAutomata(logical, arithmetic, concurrency, passthrough))
|
|
|
|
atomics.node := x
|
|
|
|
atomics.node
|
|
|
|
}
|
|
|
|
}
|
2016-09-29 00:11:05 +02:00
|
|
|
|
|
|
|
/** Synthesizeable unit tests */
|
|
|
|
import unittest._
|
|
|
|
|
|
|
|
//TODO ensure handler will pass through operations to clients that can handle them themselves
|
|
|
|
|
|
|
|
class TLRAMAtomicAutomata() extends LazyModule {
|
|
|
|
val fuzz = LazyModule(new TLFuzzer(5000))
|
|
|
|
val model = LazyModule(new TLRAMModel)
|
|
|
|
val ram = LazyModule(new TLRAM(AddressSet(0x0, 0x3ff)))
|
|
|
|
|
|
|
|
model.node := fuzz.node
|
|
|
|
ram.node := TLFragmenter(4, 256)(TLAtomicAutomata()(model.node))
|
|
|
|
|
|
|
|
lazy val module = new LazyModuleImp(this) with HasUnitTestIO {
|
|
|
|
io.finished := fuzz.module.io.finished
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
class TLRAMAtomicAutomataTest extends UnitTest(timeout = 500000) {
|
|
|
|
io.finished := Module(LazyModule(new TLRAMAtomicAutomata).module).io.finished
|
|
|
|
}
|