diff --git a/src/main/scala/uncore/axi4/Bundles.scala b/src/main/scala/uncore/axi4/Bundles.scala new file mode 100644 index 00000000..89fdbc9f --- /dev/null +++ b/src/main/scala/uncore/axi4/Bundles.scala @@ -0,0 +1,68 @@ +// See LICENSE for license details. + +package uncore.axi4 + +import Chisel._ +import chisel3.util.Irrevocable +import util.GenericParameterizedBundle + +abstract class AXI4BundleBase(params: AXI4BundleParameters) extends GenericParameterizedBundle(params) + +abstract class AXI4BundleA(params: AXI4BundleParameters) extends AXI4BundleBase(params) +{ + val id = UInt(width = params.idBits) + val addr = UInt(width = params.addrBits) + val len = UInt(width = params.lenBits) // number of beats - 1 + val size = UInt(width = params.sizeBits) // bytes in beat = 2^size + val burst = UInt(width = params.burstBits) + val lock = UInt(width = params.lockBits) + val cache = UInt(width = params.cacheBits) + val prot = UInt(width = params.protBits) + val qos = UInt(width = params.qosBits) // 0=no QoS, bigger = higher priority + // val region = UInt(width = 4) // optional +} + +// A non-standard bundle that can be both AR and AW +class AXI4BundleARW(params: AXI4BundleParameters) extends AXI4BundleA(params) +{ + val wen = Bool() +} + +class AXI4BundleAW(params: AXI4BundleParameters) extends AXI4BundleA(params) +class AXI4BundleAR(params: AXI4BundleParameters) extends AXI4BundleA(params) + +class AXI4BundleW(params: AXI4BundleParameters) extends AXI4BundleBase(params) +{ + // id ... removed in AXI4 + val data = UInt(width = params.dataBits) + val strb = UInt(width = params.dataBits/8) + val last = Bool() +} + +class AXI4BundleR(params: AXI4BundleParameters) extends AXI4BundleBase(params) +{ + val id = UInt(width = params.idBits) + val data = UInt(width = params.dataBits) + val resp = UInt(width = params.respBits) + val last = Bool() +} + +class AXI4BundleB(params: AXI4BundleParameters) extends AXI4BundleBase(params) +{ + val id = UInt(width = params.idBits) + val resp = UInt(width = params.respBits) +} + +class AXI4Bundle(params: AXI4BundleParameters) extends AXI4BundleBase(params) +{ + val aw = Irrevocable(new AXI4BundleAW(params)) + val w = Irrevocable(new AXI4BundleW (params)) + val b = Irrevocable(new AXI4BundleB (params)).flip + val ar = Irrevocable(new AXI4BundleAR(params)) + val r = Irrevocable(new AXI4BundleR (params)).flip +} + +object AXI4Bundle +{ + def apply(params: AXI4BundleParameters) = new AXI4Bundle(params) +} diff --git a/src/main/scala/uncore/axi4/Nodes.scala b/src/main/scala/uncore/axi4/Nodes.scala new file mode 100644 index 00000000..cafac4f1 --- /dev/null +++ b/src/main/scala/uncore/axi4/Nodes.scala @@ -0,0 +1,47 @@ +// See LICENSE for license details. + +package uncore.axi4 + +import Chisel._ +import chisel3.internal.sourceinfo.SourceInfo +import diplomacy._ + +object AXI4Imp extends NodeImp[AXI4MasterPortParameters, AXI4SlavePortParameters, AXI4EdgeParameters, AXI4EdgeParameters, AXI4Bundle] +{ + def edgeO(pd: AXI4MasterPortParameters, pu: AXI4SlavePortParameters): AXI4EdgeParameters = AXI4EdgeParameters(pd, pu) + def edgeI(pd: AXI4MasterPortParameters, pu: AXI4SlavePortParameters): AXI4EdgeParameters = AXI4EdgeParameters(pd, pu) + def bundleO(eo: Seq[AXI4EdgeParameters]): Vec[AXI4Bundle] = { + require (!eo.isEmpty) + Vec(eo.size, AXI4Bundle(eo.map(_.bundle).reduce(_.union(_)))) + } + def bundleI(ei: Seq[AXI4EdgeParameters]): Vec[AXI4Bundle] = { + require (!ei.isEmpty) + Vec(ei.size, AXI4Bundle(ei.map(_.bundle).reduce(_.union(_)))).flip + } + + def colour = "#00ccff" // bluish + def connect(bo: => AXI4Bundle, bi: => AXI4Bundle, ei: => AXI4EdgeParameters)(implicit sourceInfo: SourceInfo): (Option[LazyModule], () => Unit) = { + (None, () => { bi <> bo }) + } + + override def mixO(pd: AXI4MasterPortParameters, node: OutwardNode[AXI4MasterPortParameters, AXI4SlavePortParameters, AXI4Bundle]): AXI4MasterPortParameters = + pd.copy(masters = pd.masters.map { c => c.copy (nodePath = node +: c.nodePath) }) + override def mixI(pu: AXI4SlavePortParameters, node: InwardNode[AXI4MasterPortParameters, AXI4SlavePortParameters, AXI4Bundle]): AXI4SlavePortParameters = + pu.copy(slaves = pu.slaves.map { m => m.copy (nodePath = node +: m.nodePath) }) +} + +case class AXI4IdentityNode() extends IdentityNode(AXI4Imp) +case class AXI4OutputNode() extends OutputNode(AXI4Imp) +case class AXI4InputNode() extends InputNode(AXI4Imp) + +case class AXI4MasterNode(portParams: AXI4MasterPortParameters, numPorts: Range.Inclusive = 1 to 1) + extends SourceNode(AXI4Imp)(portParams, numPorts) +case class AXI4SlaveNode(portParams: AXI4SlavePortParameters, numPorts: Range.Inclusive = 1 to 1) + extends SinkNode(AXI4Imp)(portParams, numPorts) + +case class AXI4AdapterNode( + clientFn: Seq[AXI4MasterPortParameters] => AXI4MasterPortParameters, + managerFn: Seq[AXI4SlavePortParameters] => AXI4SlavePortParameters, + numMasterPorts: Range.Inclusive = 1 to 1, + numSlavePorts: Range.Inclusive = 1 to 1) + extends InteriorNode(AXI4Imp)(clientFn, managerFn, numMasterPorts, numSlavePorts) diff --git a/src/main/scala/uncore/axi4/Parameters.scala b/src/main/scala/uncore/axi4/Parameters.scala new file mode 100644 index 00000000..dacc80ee --- /dev/null +++ b/src/main/scala/uncore/axi4/Parameters.scala @@ -0,0 +1,109 @@ +// See LICENSE for license details. + +package uncore.axi4 + +import Chisel._ +import diplomacy._ +import scala.math.max + +case class AXI4SlaveParameters( + address: Seq[AddressSet], + regionType: RegionType.T = RegionType.GET_EFFECTS, + executable: Boolean = false, // processor can execute from this memory + nodePath: Seq[BaseNode] = Seq(), + supportsWrite: TransferSizes = TransferSizes.none, + supportsRead: TransferSizes = TransferSizes.none, + interleavedId: Option[Int] = None) // The device will not interleave read responses +{ + address.foreach { a => require (a.finite) } + address.combinations(2).foreach { case Seq(x,y) => require (!x.overlaps(y)) } + + val name = nodePath.lastOption.map(_.lazyModule.name).getOrElse("disconnected") + val maxTransfer = max(supportsWrite.max, supportsRead.max) + val maxAddress = address.map(_.max).max + + // The device had better not support a transfer larger than it's alignment + address.foreach { case a => require (a.alignment >= maxTransfer) } +} + +case class AXI4SlavePortParameters( + slaves: Seq[AXI4SlaveParameters], + beatBytes: Int) +{ + require (!slaves.isEmpty) + require (isPow2(beatBytes)) + + val maxTransfer = slaves.map(_.maxTransfer).max + val maxAddress = slaves.map(_.maxAddress).max + + // Check the link is not pointlessly wide + require (maxTransfer >= beatBytes) + // Check that the link can be implemented in AXI4 + require (maxTransfer <= beatBytes * (1 << AXI4Parameters.lenBits)) + + // Require disjoint ranges for addresses + slaves.combinations(2).foreach { case Seq(x,y) => + x.address.foreach { a => y.address.foreach { b => + require (!a.overlaps(b)) + } } + } +} + +case class AXI4MasterParameters( + id: IdRange = IdRange(0, 1), + nodePath: Seq[BaseNode] = Seq()) +{ + val name = nodePath.lastOption.map(_.lazyModule.name).getOrElse("disconnected") +} + +case class AXI4MasterPortParameters( + masters: Seq[AXI4MasterParameters]) +{ + val endId = masters.map(_.id.end).max + + // Require disjoint ranges for ids + masters.combinations(2).foreach { case Seq(x,y) => require (!x.id.overlaps(y.id)) } +} + +case class AXI4BundleParameters( + addrBits: Int, + dataBits: Int, + idBits: Int) +{ + require (dataBits >= 8) + require (addrBits >= 1) + require (idBits >= 1) + require (isPow2(dataBits)) + + // Bring the globals into scope + val lenBits = AXI4Parameters.lenBits + val sizeBits = AXI4Parameters.sizeBits + val burstBits = AXI4Parameters.burstBits + val lockBits = AXI4Parameters.lockBits + val cacheBits = AXI4Parameters.cacheBits + val protBits = AXI4Parameters.protBits + val qosBits = AXI4Parameters.qosBits + val respBits = AXI4Parameters.respBits + + def union(x: AXI4BundleParameters) = + AXI4BundleParameters( + max(addrBits, x.addrBits), + max(dataBits, x.dataBits), + max(idBits, x.idBits)) +} + +object AXI4BundleParameters +{ + def apply(master: AXI4MasterPortParameters, slave: AXI4SlavePortParameters) = + new AXI4BundleParameters( + addrBits = log2Up(slave.maxAddress+1), + dataBits = slave.beatBytes * 8, + idBits = log2Up(master.endId)) +} + +case class AXI4EdgeParameters( + master: AXI4MasterPortParameters, + slave: AXI4SlavePortParameters) +{ + val bundle = AXI4BundleParameters(master, slave) +} diff --git a/src/main/scala/uncore/axi4/Protocol.scala b/src/main/scala/uncore/axi4/Protocol.scala new file mode 100644 index 00000000..90403b46 --- /dev/null +++ b/src/main/scala/uncore/axi4/Protocol.scala @@ -0,0 +1,37 @@ +// See LICENSE for license details. + +package uncore.axi4 + +import Chisel._ +import chisel3.util.{Irrevocable, IrrevocableIO} + +object AXI4Parameters +{ + // These are all fixed by the AXI4 standard: + val lenBits = 8 + val sizeBits = 3 + val burstBits = 2 + val lockBits = 1 + val cacheBits = 4 + val protBits = 3 + val qosBits = 4 + val respBits = 2 + + val CACHE_RALLOCATE = UInt(8, width = cacheBits) + val CACHE_WALLOCATE = UInt(4, width = cacheBits) + val CACHE_MODIFIABLE = UInt(2, width = cacheBits) + val CACHE_BUFFERABLE = UInt(1, width = cacheBits) + + val PROT_PRIVILEDGED = UInt(1, width = protBits) + val PROT_INSECURE = UInt(2, width = protBits) + val PROT_INSTRUCTION = UInt(4, width = protBits) + + val BURST_FIXED = UInt(0, width = burstBits) + val BURST_INCR = UInt(1, width = burstBits) + val BURST_WRAP = UInt(2, width = burstBits) + + val RESP_OKAY = UInt(0, width = respBits) + val RESP_EXOKAY = UInt(1, width = respBits) + val RESP_SLVERR = UInt(2, width = respBits) + val RESP_DECERR = UInt(3, width = respBits) +} diff --git a/src/main/scala/uncore/axi4/RegisterRouter.scala b/src/main/scala/uncore/axi4/RegisterRouter.scala new file mode 100644 index 00000000..b46d69f0 --- /dev/null +++ b/src/main/scala/uncore/axi4/RegisterRouter.scala @@ -0,0 +1,113 @@ +// See LICENSE for license details. + +package uncore.axi4 + +import Chisel._ +import diplomacy._ +import regmapper._ +import scala.math.{min,max} + +class AXI4RegisterNode(address: AddressSet, concurrency: Int = 0, beatBytes: Int = 4, undefZero: Boolean = true) + extends AXI4SlaveNode(AXI4SlavePortParameters( + Seq(AXI4SlaveParameters( + address = Seq(address), + supportsWrite = TransferSizes(1, beatBytes), + supportsRead = TransferSizes(1, beatBytes), + interleavedId = Some(0))), + beatBytes = beatBytes)) +{ + require (address.contiguous) + + // Calling this method causes the matching AXI4 bundle to be + // configured to route all requests to the listed RegFields. + def regmap(mapping: RegField.Map*) = { + val ar = bundleIn(0).ar + val aw = bundleIn(0).aw + val w = bundleIn(0).w + val r = bundleIn(0).r + val b = bundleIn(0).b + + val params = RegMapperParams(log2Up((address.mask+1)/beatBytes), beatBytes, ar.bits.params.idBits) + val in = Wire(Decoupled(new RegMapperInput(params))) + + // Prefer to execute reads first + in.valid := ar.valid || (aw.valid && w.valid) + ar.ready := in.ready + aw.ready := in.ready && !ar.valid && w .valid + w .ready := in.ready && !ar.valid && aw.valid + + val addr = Mux(ar.valid, ar.bits.addr, aw.bits.addr) + val in_id = Mux(ar.valid, ar.bits.id, aw.bits.id) + val mask = uncore.tilelink2.maskGen(ar.bits.addr, ar.bits.size, beatBytes) + + in.bits.read := ar.valid + in.bits.index := addr >> log2Ceil(beatBytes) + in.bits.data := w.bits.data + in.bits.mask := Mux(ar.valid, mask, w.bits.strb) + in.bits.extra := in_id + + // Invoke the register map builder and make it Irrevocable + val out = Queue.irrevocable( + RegMapper(beatBytes, concurrency, undefZero, in, mapping:_*), + entries = 1, pipe = true, flow = true) + + // No flow control needed + out.ready := Mux(out.bits.read, r.ready, b.ready) + r.valid := out.valid && out.bits.read + b.valid := out.valid && !out.bits.read + + val out_id = if (r.bits.params.idBits == 0) UInt(0) else out.bits.extra + + r.bits.id := out_id + r.bits.data := out.bits.data + r.bits.last := Bool(true) + r.bits.resp := AXI4Parameters.RESP_OKAY + b.bits.id := out_id + b.bits.resp := AXI4Parameters.RESP_OKAY + } +} + +object AXI4RegisterNode +{ + def apply(address: AddressSet, concurrency: Int = 0, beatBytes: Int = 4, undefZero: Boolean = true) = + new AXI4RegisterNode(address, concurrency, beatBytes, undefZero) +} + +// These convenience methods below combine to make it possible to create a AXI4 +// register mapped device from a totally abstract register mapped device. + +abstract class AXI4RegisterRouterBase(address: AddressSet, interrupts: Int, concurrency: Int, beatBytes: Int, undefZero: Boolean) extends LazyModule +{ + val node = AXI4RegisterNode(address, concurrency, beatBytes, undefZero) + val intnode = uncore.tilelink2.IntSourceNode(interrupts) +} + +case class AXI4RegBundleArg(interrupts: Vec[Vec[Bool]], in: Vec[AXI4Bundle]) + +class AXI4RegBundleBase(arg: AXI4RegBundleArg) extends Bundle +{ + val interrupts = arg.interrupts + val in = arg.in +} + +class AXI4RegBundle[P](val params: P, arg: AXI4RegBundleArg) extends AXI4RegBundleBase(arg) + +class AXI4RegModule[P, B <: AXI4RegBundleBase](val params: P, bundleBuilder: => B, router: AXI4RegisterRouterBase) + extends LazyModuleImp(router) with HasRegMap +{ + val io = bundleBuilder + val interrupts = if (io.interrupts.isEmpty) Vec(0, Bool()) else io.interrupts(0) + def regmap(mapping: RegField.Map*) = router.node.regmap(mapping:_*) +} + +class AXI4RegisterRouter[B <: AXI4RegBundleBase, M <: LazyModuleImp] + (val base: BigInt, val interrupts: Int = 0, val size: BigInt = 4096, val concurrency: Int = 0, val beatBytes: Int = 4, undefZero: Boolean = true) + (bundleBuilder: AXI4RegBundleArg => B) + (moduleBuilder: (=> B, AXI4RegisterRouterBase) => M) + extends AXI4RegisterRouterBase(AddressSet(base, size-1), interrupts, concurrency, beatBytes, undefZero) +{ + require (isPow2(size)) + // require (size >= 4096) ... not absolutely required, but highly recommended + + lazy val module = moduleBuilder(bundleBuilder(AXI4RegBundleArg(intnode.bundleOut, node.bundleIn)), this) +} diff --git a/src/main/scala/uncore/axi4/SRAM.scala b/src/main/scala/uncore/axi4/SRAM.scala new file mode 100644 index 00000000..c2014f94 --- /dev/null +++ b/src/main/scala/uncore/axi4/SRAM.scala @@ -0,0 +1,67 @@ +// See LICENSE for license details. + +package uncore.axi4 + +import Chisel._ +import diplomacy._ + +class AXI4RAM(address: AddressSet, executable: Boolean = true, beatBytes: Int = 4) extends LazyModule +{ + val node = AXI4SlaveNode(AXI4SlavePortParameters( + Seq(AXI4SlaveParameters( + address = List(address), + regionType = RegionType.UNCACHED, + executable = executable, + supportsRead = TransferSizes(1, beatBytes), + supportsWrite = TransferSizes(1, beatBytes), + interleavedId = Some(0))), + beatBytes = beatBytes)) + + // We require the address range to include an entire beat (for the write mask) + require ((address.mask & (beatBytes-1)) == beatBytes-1) + + lazy val module = new LazyModuleImp(this) { + val io = new Bundle { + val in = node.bundleIn + } + + def bigBits(x: BigInt, tail: List[Boolean] = List.empty[Boolean]): List[Boolean] = + if (x == 0) tail.reverse else bigBits(x >> 1, ((x & 1) == 1) :: tail) + val mask = bigBits(address.mask >> log2Ceil(beatBytes)) + + val in = io.in(0) + val mem = SeqMem(1 << mask.filter(b=>b).size, Vec(beatBytes, Bits(width = 8))) + + val r_addr = Cat((mask zip (in.ar.bits.addr >> log2Ceil(beatBytes)).toBools).filter(_._1).map(_._2).reverse) + val w_addr = Cat((mask zip (in.aw.bits.addr >> log2Ceil(beatBytes)).toBools).filter(_._1).map(_._2).reverse) + + in.aw.ready := in. w.valid && in.b.ready + in. w.ready := in.aw.valid && in.b.ready + in. b.valid := in.w.valid && in.aw.valid + + in.b.bits.id := in.aw.bits.id + in.b.bits.resp := AXI4Parameters.RESP_OKAY + val wdata = Vec.tabulate(beatBytes) { i => in.w.bits.data(8*(i+1)-1, 8*i) } + when (in.b.fire()) { + mem.write(w_addr, wdata, in.w.bits.strb.toBools) + } + + val r_full = RegInit(Bool(false)) + val r_id = Reg(UInt()) + + when (in. r.fire()) { r_full := Bool(false) } + when (in.ar.fire()) { r_full := Bool(true) } + + in. r.valid := r_full + in.ar.ready := in.r.ready || !r_full + + when (in.ar.fire()) { + r_id := in.ar.bits.id + } + + in.r.bits.id := r_id + in.r.bits.resp := AXI4Parameters.RESP_OKAY + in.r.bits.data := Cat(mem.read(r_addr, in.ar.fire()).reverse) + in.r.bits.last := Bool(true) + } +} diff --git a/src/main/scala/uncore/axi4/package.scala b/src/main/scala/uncore/axi4/package.scala new file mode 100644 index 00000000..4ea7c9ab --- /dev/null +++ b/src/main/scala/uncore/axi4/package.scala @@ -0,0 +1,9 @@ +package uncore + +import Chisel._ +import diplomacy._ + +package object axi4 +{ + type AXI4OutwardNode = OutwardNode[AXI4MasterPortParameters, AXI4SlavePortParameters, AXI4Bundle] +} diff --git a/src/main/scala/uncore/tilelink2/Edges.scala b/src/main/scala/uncore/tilelink2/Edges.scala index 4e9c3284..aac8338c 100644 --- a/src/main/scala/uncore/tilelink2/Edges.scala +++ b/src/main/scala/uncore/tilelink2/Edges.scala @@ -25,28 +25,8 @@ class TLEdge( } } - // This gets used everywhere, so make the smallest circuit possible ... - def mask(addr_lo: UInt, lgSize: UInt): UInt = { - val lgBytes = log2Ceil(manager.beatBytes) - val sizeOH = UIntToOH(lgSize, log2Up(manager.beatBytes)) - def helper(i: Int): Seq[(Bool, Bool)] = { - if (i == 0) { - Seq((lgSize >= UInt(lgBytes), Bool(true))) - } else { - val sub = helper(i-1) - val size = sizeOH(lgBytes - i) - val bit = addr_lo(lgBytes - i) - val nbit = !bit - Seq.tabulate (1 << i) { j => - val (sub_acc, sub_eq) = sub(j/2) - val eq = sub_eq && (if (j % 2 == 1) bit else nbit) - val acc = sub_acc || (size && eq) - (acc, eq) - } - } - } - Cat(helper(lgBytes).map(_._1).reverse) - } + def mask(addr_lo: UInt, lgSize: UInt): UInt = + maskGen(addr_lo, lgSize, manager.beatBytes) // !!! make sure to align addr_lo for PutPartials with 0 masks def addr_lo(mask: UInt, lgSize: UInt): UInt = { diff --git a/src/main/scala/uncore/tilelink2/ToAXI4.scala b/src/main/scala/uncore/tilelink2/ToAXI4.scala new file mode 100644 index 00000000..f68d22a6 --- /dev/null +++ b/src/main/scala/uncore/tilelink2/ToAXI4.scala @@ -0,0 +1,241 @@ +// See LICENSE for license details. + +package uncore.tilelink2 + +import Chisel._ +import chisel3.internal.sourceinfo.SourceInfo +import diplomacy._ +import util.PositionalMultiQueue +import uncore.axi4._ +import scala.math.{min, max} + +case class TLToAXI4Node(idBits: Int) extends MixedNode(TLImp, AXI4Imp)( + dFn = { case (1, _) => + // We must erase all client information, because we crush their source Ids + Seq(AXI4MasterPortParameters(Seq(AXI4MasterParameters(id = IdRange(0, 1 << idBits))))) + }, + uFn = { case (1, Seq(AXI4SlavePortParameters(slaves, beatBytes))) => + val managers = slaves.zipWithIndex.map { case (s, id) => + TLManagerParameters( + address = s.address, + sinkId = IdRange(id, id+1), + regionType = s.regionType, + executable = s.executable, + nodePath = s.nodePath, + supportsGet = s.supportsRead, + supportsPutFull = s.supportsWrite, + supportsPutPartial = s.supportsWrite) + // AXI4 is NEVER fifo in TL sense (R+W are independent) + } + Seq(TLManagerPortParameters(managers, beatBytes, 0)) + }, + numPO = 1 to 1, + numPI = 1 to 1) + +class TLToAXI4(idBits: Int, combinational: Boolean = true) extends LazyModule +{ + val node = TLToAXI4Node(idBits) + + lazy val module = new LazyModuleImp(this) { + val io = new Bundle { + val in = node.bundleIn + val out = node.bundleOut + } + + val in = io.in(0) + val out = io.out(0) + + val edgeIn = node.edgesIn(0) + val edgeOut = node.edgesOut(0) + val slaves = edgeOut.slave.slaves + + // All pairs of slaves must promise that they will never interleave data + require (slaves(0).interleavedId.isDefined) + slaves.foreach { s => require (s.interleavedId == slaves(0).interleavedId) } + + // We need to keep the following state from A => D: (addr_lo, size, sink, source) + // All of those fields could potentially require 0 bits (argh. Chisel.) + // We will pack as many of the lowest bits of state as fit into the AXI ID. + // Any bits left-over must be put into a bank of Queues. + // The Queues are indexed by as many of the source bits as fit into the AXI ID. + // The Queues are deep enough that every source has guaranteed space in its Queue. + + val sourceBits = log2Ceil(edgeIn.client.endSourceId) + val sinkBits = log2Ceil(edgeIn.manager.endSinkId) + val sizeBits = log2Ceil(edgeIn.maxLgSize+1) + val addrBits = log2Ceil(edgeIn.manager.beatBytes) + val stateBits = addrBits + sizeBits + sinkBits + sourceBits // could be 0 + + val a_address = edgeIn.address(in.a.bits) + val a_addr_lo = edgeIn.addr_lo(a_address) + val a_source = in.a.bits.source + val a_sink = edgeIn.manager.findIdStartFast(a_address) + val a_size = edgeIn.size(in.a.bits) + val a_isPut = edgeIn.hasData(in.a.bits) + + val a_counter = RegInit(UInt(0, width = log2Up(edgeIn.maxTransfer))) + val a_beats1 = edgeIn.numBeats1(in.a.bits) + val a_first = a_counter === UInt(0) + val a_last = a_counter === UInt(1) || a_beats1 === UInt(0) + when (in.a.fire()) { + a_counter := Mux(a_first, a_beats1, a_counter - UInt(1)) + } + + // Make sure the fields are within the bounds we assumed + assert (a_source < UInt(1 << sourceBits)) + assert (a_sink < UInt(1 << sinkBits)) + assert (a_size < UInt(1 << sizeBits)) + assert (a_addr_lo < UInt(1 << addrBits)) + + // Carefully pack/unpack fields into the state we send + val baseEnd = 0 + val (sourceEnd, sourceOff) = (sourceBits + baseEnd, baseEnd) + val (sinkEnd, sinkOff) = (sinkBits + sourceEnd, sourceEnd) + val (sizeEnd, sizeOff) = (sizeBits + sinkEnd, sinkEnd) + val (addrEnd, addrOff) = (addrBits + sizeEnd, sizeEnd) + require (addrEnd == stateBits) + + val a_state = (a_source << sourceOff) | (a_sink << sinkOff) | + (a_size << sizeOff) | (a_addr_lo << addrOff) + val a_id = if (idBits == 0) UInt(0) else a_state + + val r_state = Wire(UInt(width = stateBits)) + val r_source = if (sourceBits > 0) r_state(sourceEnd-1, sourceOff) else UInt(0) + val r_sink = if (sinkBits > 0) r_state(sinkEnd -1, sinkOff) else UInt(0) + val r_size = if (sizeBits > 0) r_state(sizeEnd -1, sizeOff) else UInt(0) + val r_addr_lo = if (addrBits > 0) r_state(addrEnd -1, addrOff) else UInt(0) + + val b_state = Wire(UInt(width = stateBits)) + val b_source = if (sourceBits > 0) b_state(sourceEnd-1, sourceOff) else UInt(0) + val b_sink = if (sinkBits > 0) b_state(sinkEnd -1, sinkOff) else UInt(0) + val b_size = if (sizeBits > 0) b_state(sizeEnd -1, sizeOff) else UInt(0) + val b_addr_lo = if (addrBits > 0) b_state(addrEnd -1, addrOff) else UInt(0) + + val r_last = out.r.bits.last + val r_id = out.r.bits.id + val b_id = out.b.bits.id + + if (stateBits <= idBits) { // No need for any state tracking + r_state := r_id + b_state := b_id + } else { + val bankIndexBits = min(sourceBits, idBits) + val posBits = max(0, sourceBits - idBits) + val implicitBits = max(idBits, sourceBits) + val bankBits = stateBits - implicitBits + val numBanks = min(1 << bankIndexBits, edgeIn.client.endSourceId) + def bankEntries(i: Int) = (edgeIn.client.endSourceId+numBanks-i-1) / numBanks + + val banks = Seq.tabulate(numBanks) { i => + // We know there can only be as many outstanding requests as TL sources + // However, AXI read and write queues are not mutually FIFO. + // Therefore, we want to pop them individually, but share the storage. + PositionalMultiQueue(UInt(width=max(1,bankBits)), positions=bankEntries(i), ways=2, combinational=combinational) + } + + val a_bankPosition = if (posBits == 0) UInt(0) else a_source(sourceBits-1, idBits) + val a_bankIndex = if (bankIndexBits == 0) UInt(0) else a_source(bankIndexBits-1, 0) + val r_bankIndex = if (bankIndexBits == 0) UInt(0) else r_id(bankIndexBits-1, 0) + val b_bankIndex = if (bankIndexBits == 0) UInt(0) else b_id(bankIndexBits-1, 0) + val a_bankSelect = UIntToOH(a_bankIndex, numBanks) + val r_bankSelect = UIntToOH(r_bankIndex, numBanks) + val b_bankSelect = UIntToOH(b_bankIndex, numBanks) + + banks.zipWithIndex.foreach { case (q, i) => + // Push a_state into the banks + q.io.enq.valid := in.a.fire() && a_last && a_bankSelect(i) + q.io.enq.bits.pos := a_bankPosition + q.io.enq.bits.data := a_state >> implicitBits + q.io.enq.bits.way := Mux(a_isPut, UInt(0), UInt(1)) + // Pop the bank's ways + q.io.deq(0).ready := out.b.fire() && b_bankSelect(i) + q.io.deq(1).ready := out.r.fire() && r_bankSelect(i) && r_last + // The FIFOs must be valid when we're ready to pop them... + assert (q.io.deq(0).valid || !q.io.deq(0).ready) + assert (q.io.deq(1).valid || !q.io.deq(1).ready) + } + + val b_bankData = Vec(banks.map(_.io.deq(0).bits.data))(b_bankIndex) + val b_bankPos = Vec(banks.map(_.io.deq(0).bits.pos ))(b_bankIndex) + val r_bankData = Vec(banks.map(_.io.deq(1).bits.data))(r_bankIndex) + val r_bankPos = Vec(banks.map(_.io.deq(1).bits.pos ))(r_bankIndex) + + def optCat(x: (Boolean, UInt)*) = { Cat(x.toList.filter(_._1).map(_._2)) } + b_state := optCat((bankBits > 0, b_bankData), (posBits > 0, b_bankPos), (idBits > 0, b_id)) + r_state := optCat((bankBits > 0, r_bankData), (posBits > 0, r_bankPos), (idBits > 0, r_id)) + } + + // We need these Queues because AXI4 queues are irrevocable + val depth = if (combinational) 1 else 2 + val out_arw = Wire(Decoupled(new AXI4BundleARW(out.params))) + val out_w = Wire(out.w) + out.w <> Queue.irrevocable(out_w, entries=depth, pipe=combinational, flow=combinational) + val queue_arw = Queue.irrevocable(out_arw, entries=depth, pipe=combinational, flow=combinational) + + // Fan out the ARW channel to AR and AW + out.ar.bits := queue_arw.bits + out.aw.bits := queue_arw.bits + out.ar.valid := queue_arw.valid && !queue_arw.bits.wen + out.aw.valid := queue_arw.valid && queue_arw.bits.wen + queue_arw.ready := Mux(queue_arw.bits.wen, out.aw.ready, out.ar.ready) + + val beatBytes = edgeIn.manager.beatBytes + val maxSize = UInt(log2Ceil(beatBytes)) + val doneAW = RegInit(Bool(false)) + when (in.a.fire()) { doneAW := !a_last } + + val arw = out_arw.bits + arw.wen := a_isPut + arw.id := a_id // truncated + arw.addr := a_address + arw.len := UIntToOH1(a_size, AXI4Parameters.lenBits + log2Ceil(beatBytes)) >> log2Ceil(beatBytes) + arw.size := Mux(a_size >= maxSize, maxSize, a_size) + arw.burst := AXI4Parameters.BURST_INCR + arw.lock := UInt(0) // not exclusive (LR/SC unsupported b/c no forward progress guarantee) + arw.cache := UInt(0) // do not allow AXI to modify our transactions + arw.prot := AXI4Parameters.PROT_PRIVILEDGED + arw.qos := UInt(0) // no QoS + + in.a.ready := Mux(a_isPut, (doneAW || out_arw.ready) && out_w.ready, out_arw.ready) + out_arw.valid := in.a.valid && Mux(a_isPut, !doneAW && out_w.ready, Bool(true)) + + out_w.valid := in.a.valid && a_isPut && (doneAW || out_arw.ready) + out_w.bits.data := in.a.bits.data + out_w.bits.strb := in.a.bits.mask + out_w.bits.last := a_last + + // R and B => D arbitration + val r_holds_d = RegInit(Bool(false)) + when (out.r.fire()) { r_holds_d := !out.r.bits.last } + // Give R higher priority than B + val r_wins = out.r.valid || r_holds_d + + out.r.ready := in.d.ready + out.b.ready := in.d.ready && !r_wins + in.d.valid := Mux(r_wins, out.r.valid, out.b.valid) + + val r_error = out.r.bits.resp =/= AXI4Parameters.RESP_OKAY + val b_error = out.b.bits.resp =/= AXI4Parameters.RESP_OKAY + + val r_d = edgeIn.AccessAck(r_addr_lo, r_sink, r_source, r_size, UInt(0), r_error) + val b_d = edgeIn.AccessAck(b_addr_lo, b_sink, b_source, b_size, b_error) + + in.d.bits := Mux(r_wins, r_d, b_d) + in.d.bits.data := out.r.bits.data // avoid a costly Mux + + // Tie off unused channels + in.b.valid := Bool(false) + in.c.ready := Bool(true) + in.e.ready := Bool(true) + } +} + +object TLToAXI4 +{ + // applied to the TL source node; y.node := TLToAXI4(idBits)(x.node) + def apply(idBits: Int, combinational: Boolean = true)(x: TLOutwardNode)(implicit sourceInfo: SourceInfo): AXI4OutwardNode = { + val axi4 = LazyModule(new TLToAXI4(idBits, combinational)) + axi4.node := x + axi4.node + } +} diff --git a/src/main/scala/uncore/tilelink2/package.scala b/src/main/scala/uncore/tilelink2/package.scala index afcb77a4..a0490d7e 100644 --- a/src/main/scala/uncore/tilelink2/package.scala +++ b/src/main/scala/uncore/tilelink2/package.scala @@ -17,4 +17,26 @@ package object tilelink2 if (s >= w) x else helper(s+s, x | (x << s)(w-1,0)) helper(1, x) } + // This gets used everywhere, so make the smallest circuit possible ... + def maskGen(addr_lo: UInt, lgSize: UInt, beatBytes: Int): UInt = { + val lgBytes = log2Ceil(beatBytes) + val sizeOH = UIntToOH(lgSize, log2Up(beatBytes)) + def helper(i: Int): Seq[(Bool, Bool)] = { + if (i == 0) { + Seq((lgSize >= UInt(lgBytes), Bool(true))) + } else { + val sub = helper(i-1) + val size = sizeOH(lgBytes - i) + val bit = addr_lo(lgBytes - i) + val nbit = !bit + Seq.tabulate (1 << i) { j => + val (sub_acc, sub_eq) = sub(j/2) + val eq = sub_eq && (if (j % 2 == 1) bit else nbit) + val acc = sub_acc || (size && eq) + (acc, eq) + } + } + } + Cat(helper(lgBytes).map(_._1).reverse) + } } diff --git a/src/main/scala/util/PositionalMultiQueue.scala b/src/main/scala/util/PositionalMultiQueue.scala new file mode 100644 index 00000000..cd87ebdc --- /dev/null +++ b/src/main/scala/util/PositionalMultiQueue.scala @@ -0,0 +1,93 @@ +// See LICENSE for license details. + +package util +import Chisel._ + +case class PositionalMultiQueueParameters[T <: Data](gen: T, positions: Int, ways: Int) + +class PositionalMultiQueueEntry[T <: Data](params: PositionalMultiQueueParameters[T]) + extends GenericParameterizedBundle(params) +{ + val data = params.gen.asOutput + val pos = UInt(width = log2Up(params.positions)) +} + +class PositionalMultiQueuePush[T <: Data](params: PositionalMultiQueueParameters[T]) + extends PositionalMultiQueueEntry(params) +{ + val way = UInt(width = log2Up(params.ways)) +} + +/* A PositionalMultiQueue is like a normal Queue, except that it stores (position, value). + * When you pop it, you get back the oldest (position, value) pushed into it. + * >>>>> You must guarantee that you never enque to an occupied position. <<<<< + * Unlike a normal Queue, a PositionalMultiQueue has multiple deque ports (ways). + * You select which way will deque a given (position, value) when you enque it. + * If combinational, deque ports become valid on the same cycle as the enque. + */ +class PositionalMultiQueue[T <: Data](params: PositionalMultiQueueParameters[T], combinational: Boolean) extends Module +{ + val io = new Bundle { + val enq = Valid(new PositionalMultiQueuePush(params)).flip + val deq = Vec(params.ways, Decoupled(new PositionalMultiQueueEntry(params))) + } + + val empty = RegInit(Vec.fill(params.ways) { Bool(true) }) + val head = Reg(Vec(params.ways, UInt(width = log2Up(params.positions)))) + val tail = Reg(Vec(params.ways, UInt(width = log2Up(params.positions)))) + val next = Reg(Vec(params.positions, UInt(width = log2Up(params.positions)))) + val data = Reg(Vec(params.positions, params.gen)) + // optimized away for synthesis; used to confirm invariant + val guard = RegInit(Vec.fill(params.positions) { Bool(false) }) + + when (io.enq.fire()) { + data(io.enq.bits.pos) := io.enq.bits.data + // ensure the user never stores to the same position twice + assert (!guard(io.enq.bits.pos)) + guard(io.enq.bits.pos) := Bool(true) + } + + val deq = Wire(io.deq) + io.deq <> deq + + val waySelect = UIntToOH(io.enq.bits.way, params.ways) + for (i <- 0 until params.ways) { + val enq = io.enq.fire() && waySelect(i) + val last = head(i) === tail(i) + + when (enq) { + tail(i) := io.enq.bits.pos + when (empty(i)) { + head(i) := io.enq.bits.pos + } .otherwise { + next(tail(i)) := io.enq.bits.pos + } + } + + if (combinational) { + deq(i).valid := !empty(i) || enq + deq(i).bits.pos := Mux(empty(i), io.enq.bits.pos, head(i)) + deq(i).bits.data := Mux(empty(i), io.enq.bits.data, data(head(i))) + } else { + deq(i).valid := !empty(i) + deq(i).bits.pos := head(i) + deq(i).bits.data := data(head(i)) + } + + when (deq(i).fire()) { + head(i) := Mux(last, io.enq.bits.pos, next(head(i))) + guard(deq(i).bits.pos) := Bool(false) + } + + when (enq =/= deq(i).fire()) { + empty(i) := deq(i).fire() && last + } + } +} + +object PositionalMultiQueue +{ + def apply[T <: Data](gen: T, positions: Int, ways: Int = 1, combinational: Boolean = true) = { + Module(new PositionalMultiQueue(PositionalMultiQueueParameters(gen, positions, ways), combinational)) + } +}