From 3db375ef43ab57f145162a81f4965cacf7a765ad Mon Sep 17 00:00:00 2001 From: "Wesley W. Terpstra" Date: Thu, 22 Mar 2018 14:29:36 -0700 Subject: [PATCH] devices: add support for the chiplink protocol --- src/main/scala/devices/chiplink/Bundles.scala | 93 ++++++++ src/main/scala/devices/chiplink/CAM.scala | 37 ++++ .../scala/devices/chiplink/ChipLink.scala | 207 ++++++++++++++++++ .../scala/devices/chiplink/Parameters.scala | 137 ++++++++++++ src/main/scala/devices/chiplink/Partial.scala | 106 +++++++++ src/main/scala/devices/chiplink/RX.scala | 90 ++++++++ src/main/scala/devices/chiplink/SinkA.scala | 65 ++++++ src/main/scala/devices/chiplink/SinkB.scala | 62 ++++++ src/main/scala/devices/chiplink/SinkC.scala | 63 ++++++ src/main/scala/devices/chiplink/SinkD.scala | 60 +++++ src/main/scala/devices/chiplink/SinkE.scala | 33 +++ src/main/scala/devices/chiplink/SourceA.scala | 104 +++++++++ src/main/scala/devices/chiplink/SourceB.scala | 68 ++++++ src/main/scala/devices/chiplink/SourceC.scala | 87 ++++++++ src/main/scala/devices/chiplink/SourceD.scala | 82 +++++++ src/main/scala/devices/chiplink/SourceE.scala | 21 ++ src/main/scala/devices/chiplink/TX.scala | 99 +++++++++ 17 files changed, 1414 insertions(+) create mode 100644 src/main/scala/devices/chiplink/Bundles.scala create mode 100644 src/main/scala/devices/chiplink/CAM.scala create mode 100644 src/main/scala/devices/chiplink/ChipLink.scala create mode 100644 src/main/scala/devices/chiplink/Parameters.scala create mode 100644 src/main/scala/devices/chiplink/Partial.scala create mode 100644 src/main/scala/devices/chiplink/RX.scala create mode 100644 src/main/scala/devices/chiplink/SinkA.scala create mode 100644 src/main/scala/devices/chiplink/SinkB.scala create mode 100644 src/main/scala/devices/chiplink/SinkC.scala create mode 100644 src/main/scala/devices/chiplink/SinkD.scala create mode 100644 src/main/scala/devices/chiplink/SinkE.scala create mode 100644 src/main/scala/devices/chiplink/SourceA.scala create mode 100644 src/main/scala/devices/chiplink/SourceB.scala create mode 100644 src/main/scala/devices/chiplink/SourceC.scala create mode 100644 src/main/scala/devices/chiplink/SourceD.scala create mode 100644 src/main/scala/devices/chiplink/SourceE.scala create mode 100644 src/main/scala/devices/chiplink/TX.scala diff --git a/src/main/scala/devices/chiplink/Bundles.scala b/src/main/scala/devices/chiplink/Bundles.scala new file mode 100644 index 0000000..01f172f --- /dev/null +++ b/src/main/scala/devices/chiplink/Bundles.scala @@ -0,0 +1,93 @@ +// See LICENSE for license details. +package sifive.blocks.devices.chiplink + +import Chisel._ +import freechips.rocketchip.util.{rightOR,GenericParameterizedBundle} + +class WideDataLayerPortLane(params: ChipLinkParams) extends GenericParameterizedBundle(params) { + val clk = Clock(OUTPUT) + val rst = Bool(OUTPUT) + val send = Bool(OUTPUT) + val data = UInt(OUTPUT, width=params.dataBits) +} + +class WideDataLayerPort(params: ChipLinkParams) extends GenericParameterizedBundle(params) { + val c2b = new WideDataLayerPortLane(params) + val b2c = new WideDataLayerPortLane(params).flip +} + +class DataLayer(params: ChipLinkParams) extends GenericParameterizedBundle(params) { + val data = UInt(OUTPUT, width=params.dataBits) + val last = Bool(OUTPUT) + val beats = UInt(OUTPUT, width=params.xferBits + 1) +} + +class CreditBump(params: ChipLinkParams) extends GenericParameterizedBundle(params) { + val a = UInt(OUTPUT, width = params.creditBits) + val b = UInt(OUTPUT, width = params.creditBits) + val c = UInt(OUTPUT, width = params.creditBits) + val d = UInt(OUTPUT, width = params.creditBits) + val e = UInt(OUTPUT, width = params.creditBits) + def X: Seq[UInt] = Seq(a, b, c, d, e) + + // saturating addition + def +(that: CreditBump): CreditBump = { + val out = Wire(new CreditBump(params)) + (out.X zip (X zip that.X)) foreach { case (o, (x, y)) => + val z = x +& y + o := Mux((z >> params.creditBits).orR, ~UInt(0, width=params.creditBits), z) + } + out + } + + // Send the MSB of the credits + def toHeader: (UInt, CreditBump) = { + def msb(x: UInt) = { + val mask = rightOR(x) >> 1 + val msbOH = ~(~x | mask) + val msb = OHToUInt(msbOH << 1, params.creditBits + 1) // 0 = 0, 1 = 1, 2 = 4, 3 = 8, ... + val pad = (msb | UInt(0, width=5))(4,0) + (pad, x & mask) + } + val (a_msb, a_rest) = msb(a) + val (b_msb, b_rest) = msb(b) + val (c_msb, c_rest) = msb(c) + val (d_msb, d_rest) = msb(d) + val (e_msb, e_rest) = msb(e) + val header = Cat( + e_msb, d_msb, c_msb, b_msb, a_msb, + UInt(0, width = 4), // padding + UInt(5, width = 3)) + + val out = Wire(new CreditBump(params)) + out.a := a_rest + out.b := b_rest + out.c := c_rest + out.d := d_rest + out.e := e_rest + (header, out) + } +} + +object CreditBump { + def apply(params: ChipLinkParams, x: Int): CreditBump = { + val v = UInt(x, width = params.creditBits) + val out = Wire(new CreditBump(params)) + out.X.foreach { _ := v } + out + } + + def apply(params: ChipLinkParams, header: UInt): CreditBump = { + def convert(x: UInt) = + Mux(x > UInt(params.creditBits), + ~UInt(0, width = params.creditBits), + UIntToOH(x, params.creditBits + 1) >> 1) + val out = Wire(new CreditBump(params)) + out.a := convert(header(11, 7)) + out.b := convert(header(16, 12)) + out.c := convert(header(21, 17)) + out.d := convert(header(26, 22)) + out.e := convert(header(31, 27)) + out + } +} diff --git a/src/main/scala/devices/chiplink/CAM.scala b/src/main/scala/devices/chiplink/CAM.scala new file mode 100644 index 0000000..97f2b71 --- /dev/null +++ b/src/main/scala/devices/chiplink/CAM.scala @@ -0,0 +1,37 @@ +// See LICENSE for license details. +package sifive.blocks.devices.chiplink + +import Chisel._ +import freechips.rocketchip.tilelink._ +import freechips.rocketchip.util._ + +class CAM(keys: Int, dataBits: Int) extends Module +{ + val io = new Bundle { + // alloc.valid => allocate a key + // alloc.ready => a key is avilable + val alloc = Decoupled(UInt(width = dataBits)).flip + val key = UInt(OUTPUT, width = log2Ceil(keys)) + // free.valid => release the key + val free = Valid(UInt(width = log2Ceil(keys))).flip + val data = UInt(OUTPUT, width = dataBits) + } + + val free = RegInit(UInt((BigInt(1) << keys) - 1, width = keys)) + val data = Mem(keys, UInt(width = dataBits)) + + val free_sel = ~(leftOR(free, keys) << 1) & free + io.key := OHToUInt(free_sel, keys) + + io.alloc.ready := free.orR + when (io.alloc.fire()) { data.write(io.key, io.alloc.bits) } + + // Support free in same cycle as alloc + val bypass = io.alloc.fire() && io.free.bits === io.key + io.data := Mux(bypass, io.alloc.bits, data(io.free.bits)) + + // Update CAM usage + val clr = Mux(io.alloc.fire(), free_sel, UInt(0)) + val set = Mux(io.free.valid, UIntToOH(io.free.bits), UInt(0)) + free := (free & ~clr) | set +} diff --git a/src/main/scala/devices/chiplink/ChipLink.scala b/src/main/scala/devices/chiplink/ChipLink.scala new file mode 100644 index 0000000..6ccd5c3 --- /dev/null +++ b/src/main/scala/devices/chiplink/ChipLink.scala @@ -0,0 +1,207 @@ +// See LICENSE for license details. +package sifive.blocks.devices.chiplink + +import Chisel._ +import freechips.rocketchip.config.{Field, Parameters} +import freechips.rocketchip.diplomacy._ +import freechips.rocketchip.tilelink._ +import freechips.rocketchip.devices.tilelink.TLBusBypass +import freechips.rocketchip.util._ + +class ChipLink(val params: ChipLinkParams)(implicit p: Parameters) extends LazyModule() { + + val device = new SimpleBus("chiplink", Seq("sifive,chiplink")) + + private def maybeManager(x: Seq[AddressSet], f: Seq[AddressSet] => TLManagerParameters) = + if (x.isEmpty) Nil else Seq(f(x)) + + private val slaveNode = TLManagerNode(Seq(TLManagerPortParameters( + managers = + maybeManager(params.TLUH, a => TLManagerParameters( + address = a, + resources = device.ranges, + regionType = RegionType.GET_EFFECTS, + executable = true, + supportsArithmetic = params.atomicXfer, + supportsLogical = params.atomicXfer, + supportsGet = params.fullXfer, + supportsPutFull = params.fullXfer, + supportsPutPartial = params.fullXfer, + supportsHint = params.fullXfer, + fifoId = Some(0))) ++ + maybeManager(params.TLC, a => TLManagerParameters( + address = a, + resources = device.ranges, + regionType = RegionType.TRACKED, + executable = true, + supportsAcquireT = params.acqXfer, + supportsAcquireB = params.acqXfer, + supportsArithmetic = params.atomicXfer, + supportsLogical = params.atomicXfer, + supportsGet = params.fullXfer, + supportsPutFull = params.fullXfer, + supportsPutPartial = params.fullXfer, + supportsHint = params.fullXfer, + fifoId = Some(0))), + beatBytes = 4, + endSinkId = params.sinks, + minLatency = params.latency))) + + // Masters 1+ require order; Master 0 is unordered and may cache + private val masterNode = TLClientNode(Seq(TLClientPortParameters( + clients = Seq.tabulate(params.domains) { i => + TLClientParameters( + name = "ChipLink Domain #" + i, + sourceId = IdRange(i*params.sourcesPerDomain, (i + 1)*params.sourcesPerDomain), + requestFifo = i > 0, + supportsProbe = if (i == 0) params.fullXfer else params.noXfer) }, + minLatency = params.latency))) + + private val bypass = LazyModule(new TLBusBypass(beatBytes = 4)) + slaveNode := bypass.node + + val node = NodeHandle(bypass.node, masterNode) + + // Exported memory map. Used when connecting VIP + lazy val managers = masterNode.edges.out(0).manager.managers + lazy val mmap = { + val (tlc, tluh) = managers.partition(_.supportsAcquireB) + params.copy( + TLUH = AddressSet.unify(tluh.flatMap(_.address)), + TLC = AddressSet.unify(tlc.flatMap(_.address))) + } + + lazy val module = new LazyModuleImp(this) { + val io = IO(new Bundle { + val port = new WideDataLayerPort(params) + val bypass = Bool(OUTPUT) + // These are fed to port.c2b.{clk,rst} -- must be specified by creator + val c2b_clk = Clock(INPUT) + val c2b_rst = Bool(INPUT) + }) + + // Ensure downstream devices support our requirements + val (in, edgeIn) = slaveNode.in(0) + val (out, edgeOut) = masterNode.out(0) + + require (edgeIn.manager.beatBytes == 4) + edgeOut.manager.requireFifo() + + edgeOut.manager.managers.foreach { m => + require (m.supportsGet.contains(params.fullXfer), + s"ChipLink requires ${m.name} support ${params.fullXfer} Get, not ${m.supportsGet}") + if (m.supportsPutFull) { + require (m.supportsPutFull.contains(params.fullXfer), + s"ChipLink requires ${m.name} support ${params.fullXfer} PutFill, not ${m.supportsPutFull}") + // !!! argh. AHB devices can't: require (m.supportsPutPartial.contains(params.fullXfer), + // s"ChipLink requires ${m.name} support ${params.fullXfer} PutPartial not ${m.supportsPutPartial}") + require (m.supportsArithmetic.contains(params.atomicXfer), + s"ChipLink requires ${m.name} support ${params.atomicXfer} Arithmetic, not ${m.supportsArithmetic}") + require (m.supportsLogical.contains(params.atomicXfer), + s"ChipLink requires ${m.name} support ${params.atomicXfer} Logical, not ${m.supportsLogical}") + } + require (m.supportsHint.contains(params.fullXfer), + s"ChipLink requires ${m.name} support ${params.fullXfer} Hint, not ${m.supportsHint}") + require (!m.supportsAcquireT || m.supportsAcquireT.contains(params.acqXfer), + s"ChipLink requires ${m.name} support ${params.acqXfer} AcquireT, not ${m.supportsAcquireT}") + require (!m.supportsAcquireB || m.supportsAcquireB.contains(params.acqXfer), + s"ChipLink requires ${m.name} support ${params.acqXfer} AcquireB, not ${m.supportsAcquireB}") + require (!m.supportsAcquireB || !m.supportsPutFull || m.supportsAcquireT, + s"ChipLink requires ${m.name} to support AcquireT if it supports Put and AcquireB") + } + + // Anything that is optional, must be supported by the error device (for redirect) + val errorDevs = edgeOut.manager.managers.filter(_.nodePath.last.lazyModule.className == "TLError") + require (!errorDevs.isEmpty, "There is no TLError reachable from ChipLink. One must be instantiated.") + val errorDev = errorDevs.head + require (errorDev.supportsPutFull.contains(params.fullXfer), + s"ChipLink requires ${errorDev.name} support ${params.fullXfer} PutFill, not ${errorDev.supportsPutFull}") + require (errorDev.supportsPutPartial.contains(params.fullXfer), + s"ChipLink requires ${errorDev.name} support ${params.fullXfer} PutPartial not ${errorDev.supportsPutPartial}") + require (errorDev.supportsArithmetic.contains(params.atomicXfer), + s"ChipLink requires ${errorDev.name} support ${params.atomicXfer} Arithmetic, not ${errorDev.supportsArithmetic}") + require (errorDev.supportsLogical.contains(params.atomicXfer), + s"ChipLink requires ${errorDev.name} support ${params.atomicXfer} Logical, not ${errorDev.supportsLogical}") + require (errorDev.supportsAcquireT.contains(params.acqXfer), + s"ChipLink requires ${errorDev.name} support ${params.acqXfer} AcquireT, not ${errorDev.supportsAcquireT}") + + // At most one cache can master ChipLink + require (edgeIn.client.clients.filter(_.supportsProbe).size <= 1, + s"ChipLink supports at most one caching master, ${edgeIn.client.clients.filter(_.supportsProbe).map(_.name)}") + + // Construct the info needed by all submodules + val info = ChipLinkInfo(params, edgeIn, edgeOut, errorDevs.head.address.head.base) + + val sinkA = Module(new SinkA(info)) + val sinkB = Module(new SinkB(info)) + val sinkC = Module(new SinkC(info)) + val sinkD = Module(new SinkD(info)) + val sinkE = Module(new SinkE(info)) + val sourceA = Module(new SourceA(info)) + val sourceB = Module(new SourceB(info)) + val sourceC = Module(new SourceC(info)) + val sourceD = Module(new SourceD(info)) + val sourceE = Module(new SourceE(info)) + + val rx = Module(new RX(info)) + rx.clock := io.port.b2c.clk + rx.reset := io.port.b2c.rst + rx.io.b2c_data := io.port.b2c.data + rx.io.b2c_send := io.port.b2c.send + out.a <> sourceA.io.a + in .b <> sourceB.io.b + out.c <> sourceC.io.c + in .d <> sourceD.io.d + out.e <> sourceE.io.e + sourceA.io.q <> FromAsyncBundle(rx.io.a) + sourceB.io.q <> FromAsyncBundle(rx.io.b) + sourceC.io.q <> FromAsyncBundle(rx.io.c) + sourceD.io.q <> FromAsyncBundle(rx.io.d) + sourceE.io.q <> FromAsyncBundle(rx.io.e) + + val tx = Module(new TX(info)) + io.port.c2b.data := tx.io.c2b_data + io.port.c2b.send := tx.io.c2b_send + sinkA.io.a <> in .a + sinkB.io.b <> out.b + sinkC.io.c <> in .c + sinkD.io.d <> out.d + sinkE.io.e <> in .e + if (params.syncTX) { + tx.io.sa <> sinkA.io.q + tx.io.sb <> sinkB.io.q + tx.io.sc <> sinkC.io.q + tx.io.sd <> sinkD.io.q + tx.io.se <> sinkE.io.q + } else { + tx.clock := io.port.c2b.clk + tx.reset := io.port.c2b.rst + tx.io.a <> ToAsyncBundle(sinkA.io.q, params.crossingDepth) + tx.io.b <> ToAsyncBundle(sinkB.io.q, params.crossingDepth) + tx.io.c <> ToAsyncBundle(sinkC.io.q, params.crossingDepth) + tx.io.d <> ToAsyncBundle(sinkD.io.q, params.crossingDepth) + tx.io.e <> ToAsyncBundle(sinkE.io.q, params.crossingDepth) + } + + // Pass credits from RX to TX + tx.io.rxc <> rx.io.rxc + tx.io.txc <> rx.io.txc + + // Connect the CAM source pools + sinkD.io.a_clSource := sourceA.io.d_clSource + sourceA.io.d_tlSource := sinkD.io.a_tlSource + sinkD.io.c_clSource := sourceC.io.d_clSource + sourceC.io.d_tlSource := sinkD.io.c_tlSource + sourceD.io.e_tlSink := sinkE.io.d_tlSink + sinkE.io.d_clSink := sourceD.io.e_clSink + + // Create the TX clock domain from input + io.port.c2b.clk := io.c2b_clk + io.port.c2b.rst := io.c2b_rst + + // Disable ChipLink while RX+TX are in reset + val do_bypass = ResetCatchAndSync(clock, rx.reset) || ResetCatchAndSync(clock, tx.reset) + bypass.module.io.bypass := do_bypass + io.bypass := do_bypass + } +} diff --git a/src/main/scala/devices/chiplink/Parameters.scala b/src/main/scala/devices/chiplink/Parameters.scala new file mode 100644 index 0000000..190fc2a --- /dev/null +++ b/src/main/scala/devices/chiplink/Parameters.scala @@ -0,0 +1,137 @@ +// See LICENSE for license details. +package sifive.blocks.devices.chiplink + +import Chisel._ +import freechips.rocketchip.config.{Field, Parameters} +import freechips.rocketchip.diplomacy._ +import freechips.rocketchip.tilelink._ + +case class ChipLinkParams(TLUH: Seq[AddressSet], TLC: Seq[AddressSet], sourceBits: Int = 6, sinkBits: Int = 5, syncTX: Boolean = false) +{ + val domains = 8 // hard-wired into chiplink protocol + require (sourceBits >= log2Ceil(domains)) + require (sinkBits >= 0) + val sources = 1 << sourceBits + val sinks = 1 << sinkBits + val sourcesPerDomain = sources / domains + val latency = 8 // ChipLink has at least 4 cycles of synchronization per side + val dataBytes = 4 + val dataBits = dataBytes*8 + val clSourceBits = 16 + val clSinkBits = 16 + val crossingDepth = 8 + val Qdepth = 8192 / dataBytes + val maxXfer = 4096 + val xferBits = log2Ceil(maxXfer) + val creditBits = 20 // use saturating addition => we can exploit at most 1MB of buffers + val addressBits = 64 + require (log2Ceil(Qdepth + 1) <= creditBits) + + // Protocol supported operations: + val noXfer = TransferSizes.none + val fullXfer = TransferSizes(1, 64) // !!! 4096) + val acqXfer = TransferSizes(64, 64) + val atomicXfer = TransferSizes(1, 8) + +} + +case object ChipLinkKey extends Field[Seq[ChipLinkParams]] + +case class TXN(domain: Int, source: Int) +case class ChipLinkInfo(params: ChipLinkParams, edgeIn: TLEdge, edgeOut: TLEdge, errorDev: BigInt) +{ + // TL source => CL TXN + val sourceMap: Map[Int, TXN] = { + var alloc = 1 + val domains = Array.fill(params.domains) { 0 } + println("ChipLink source mapping CLdomain CLsource <= TLsource:") + val out = Map() ++ edgeIn.client.clients.flatMap { c => + // If the client needs order, pick a domain for it + val domain = if (c.requestFifo) alloc else 0 + val offset = domains(domain) + println(s"\t${domain} [${offset}, ${offset + c.sourceId.size}) <= [${c.sourceId.start}, ${c.sourceId.end}):\t${c.name}") + if (c.requestFifo) { + alloc = alloc + 1 + if (alloc == params.domains) alloc = 1 + } + c.sourceId.range.map { id => + val source = domains(domain) + domains(domain) = source + 1 + (id, TXN(domain, source)) + } + } + println("") + out + } + + def mux(m: Map[Int, Int]): Vec[UInt] = { + val maxKey = m.keys.max + val maxVal = m.values.max + val valBits = log2Up(maxVal + 1) + val out = Wire(Vec(maxKey + 1, UInt(width = valBits))) + m.foreach { case (k, v) => out(k) := UInt(v, width = valBits) } + out + } + + // Packet format; little-endian + def encode(format: UInt, opcode: UInt, param: UInt, size: UInt, domain: UInt, source: UInt): UInt = { + def fmt(x: UInt, w: Int) = (x | UInt(0, width=w))(w-1, 0) + Cat( + fmt(source, 16), + fmt(domain, 3), + fmt(size, 4), + fmt(param, 3), + fmt(opcode, 3), + fmt(format, 3)) + } + + def decode(x: UInt): Seq[UInt] = { + val format = x( 2, 0) + val opcode = x( 5, 3) + val param = x( 8, 6) + val size = x(12, 9) + val domain = x(15, 13) + val source = x(31, 16) + Seq(format, opcode, param, size, domain, source) + } + + def size2beats(size: UInt): UInt = { + val shift = log2Ceil(params.dataBytes) + Cat(UIntToOH(size|UInt(0, width=4), params.xferBits + 1) >> (shift + 1), size <= UInt(shift)) + } + + def mask2beats(size: UInt): UInt = { + val shift = log2Ceil(params.dataBytes*8) + Cat(UIntToOH(size|UInt(0, width=4), params.xferBits + 1) >> (shift + 1), size <= UInt(shift)) + } + + def beats1(x: UInt, forceFormat: Option[UInt] = None): UInt = { + val Seq(format, opcode, _, size, _, _) = decode(x) + val beats = size2beats(size) + val masks = mask2beats(size) + val grant = opcode === TLMessages.Grant || opcode === TLMessages.GrantData + val partial = opcode === TLMessages.PutPartialData + val a = Mux(opcode(2), UInt(0), beats) + UInt(2) + Mux(partial, masks, UInt(0)) + val b = Mux(opcode(2), UInt(0), beats) + UInt(2) + Mux(partial, masks, UInt(0)) + val c = Mux(opcode(0), beats, UInt(0)) + UInt(2) + val d = Mux(opcode(0), beats, UInt(0)) + grant.asUInt + val e = UInt(0) + val f = UInt(0) + Vec(a, b, c, d, e, f)(forceFormat.getOrElse(format)) + } + + def firstlast(x: DecoupledIO[UInt], forceFormat: Option[UInt] = None): (Bool, Bool) = { + val count = RegInit(UInt(0)) + val beats = beats1(x.bits, forceFormat) + val first = count === UInt(0) + val last = count === UInt(1) || (first && beats === UInt(0)) + when (x.fire()) { count := Mux(first, beats, count - UInt(1)) } + (first, last) + } + + // You can't just unilaterally use error, because this would misalign the mask + def makeError(legal: Bool, address: UInt): UInt = + Cat( + Mux(legal, address, UInt(errorDev))(params.addressBits-1, log2Ceil(params.maxXfer)), + address(log2Ceil(params.maxXfer)-1, 0)) +} diff --git a/src/main/scala/devices/chiplink/Partial.scala b/src/main/scala/devices/chiplink/Partial.scala new file mode 100644 index 0000000..4633442 --- /dev/null +++ b/src/main/scala/devices/chiplink/Partial.scala @@ -0,0 +1,106 @@ +// See LICENSE for license details. +package sifive.blocks.devices.chiplink + +import Chisel._ +import freechips.rocketchip.tilelink._ +import freechips.rocketchip.util._ + +class ParitalExtractor[T <: TLDataChannel](gen: T) extends Module +{ + val io = new Bundle { + val last = Bool(INPUT) + val i = Decoupled(gen).flip + val o = Decoupled(gen) + } + + io.o <> io.i + + // Grab references to the fields we care about + val (i_opcode, i_data) = io.i.bits match { + case a: TLBundleA => (a.opcode, a.data) + case b: TLBundleB => (b.opcode, b.data) + } + val (o_data, o_mask) = io.o.bits match { + case a: TLBundleA => (a.data, a.mask) + case b: TLBundleB => (b.data, b.mask) + } + + val state = RegInit(UInt(0, width=4)) // number of nibbles; [0,8] + val shift = Reg(UInt(width=32)) + val enable = i_opcode === TLMessages.PutPartialData + val empty = state === UInt(0) + + when (enable) { + val wide = shift | (i_data << (state << 2)) + o_data := Vec.tabulate(4) { i => wide(9*(i+1)-1, 9*i+1) } .asUInt + o_mask := Vec.tabulate(4) { i => wide(9*i) } .asUInt + + // Swallow beat if we have no nibbles + when (empty) { + io.i.ready := Bool(true) + io.o.valid := Bool(false) + } + + // Update the FSM + when (io.i.fire()) { + shift := Mux(empty, i_data, wide >> 36) + state := state - UInt(1) + when (empty) { state := UInt(8) } + when (io.last) { state := UInt(0) } + } + } +} + +class PartialInjector[T <: TLDataChannel](gen: T) extends Module +{ + val io = new Bundle { + val i_last = Bool(INPUT) + val o_last = Bool(OUTPUT) + val i = Decoupled(gen).flip + val o = Decoupled(gen) + } + + io.o <> io.i + + // Grab references to the fields we care about + val (i_opcode, i_data, i_mask) = io.i.bits match { + case a: TLBundleA => (a.opcode, a.data, a.mask) + case b: TLBundleB => (b.opcode, b.data, b.mask) + } + val o_data = io.o.bits match { + case a: TLBundleA => a.data + case b: TLBundleB => b.data + } + + val state = RegInit(UInt(0, width=4)) // number of nibbles; [0,8] + val shift = RegInit(UInt(0, width=32)) + val full = state(3) + val partial = i_opcode === TLMessages.PutPartialData + + val last = RegInit(Bool(false)) + io.o_last := Mux(partial, last, io.i_last) + + when (partial) { + val bytes = Seq.tabulate(4) { i => i_data(8*(i+1)-1, 8*i) } + val bits = i_mask.toBools + val mixed = Cat(Seq(bits, bytes).transpose.flatten.reverse) + val wide = shift | (mixed << (state << 2)) + o_data := wide + + // Inject a beat + when ((io.i_last || full) && !last) { + io.i.ready := Bool(false) + } + + // Update the FSM + when (io.o.fire()) { + shift := wide >> 32 + state := state + UInt(1) + when (full || last) { + state := UInt(0) + shift := UInt(0) + } + last := io.i_last && !last + } + } +} diff --git a/src/main/scala/devices/chiplink/RX.scala b/src/main/scala/devices/chiplink/RX.scala new file mode 100644 index 0000000..6acc629 --- /dev/null +++ b/src/main/scala/devices/chiplink/RX.scala @@ -0,0 +1,90 @@ +// See LICENSE for license details. +package sifive.blocks.devices.chiplink + +import Chisel._ +import freechips.rocketchip.tilelink._ +import freechips.rocketchip.util._ + +class RX(info: ChipLinkInfo) extends Module +{ + val io = new Bundle { + val b2c_send = Bool(INPUT) + val b2c_data = UInt(INPUT, info.params.dataBits) + val a = new AsyncBundle(info.params.crossingDepth, UInt(width = info.params.dataBits)) + val b = new AsyncBundle(info.params.crossingDepth, UInt(width = info.params.dataBits)) + val c = new AsyncBundle(info.params.crossingDepth, UInt(width = info.params.dataBits)) + val d = new AsyncBundle(info.params.crossingDepth, UInt(width = info.params.dataBits)) + val e = new AsyncBundle(info.params.crossingDepth, UInt(width = info.params.dataBits)) + val rxc = new AsyncBundle(1, new CreditBump(info.params)) + val txc = new AsyncBundle(1, new CreditBump(info.params)) + } + + // Immediately register our input data + val b2c_data = RegNext(RegNext(io.b2c_data)) + val b2c_send = RegNext(RegNext(io.b2c_send, Bool(false)), Bool(false)) + + // Fit b2c into the firstlast API + val beat = Wire(Decoupled(UInt(width = info.params.dataBits))) + beat.bits := b2c_data + beat.valid := b2c_send + beat.ready := Bool(true) + + // Select the correct HellaQueue for the request + val (first, _) = info.firstlast(beat) + val formatBits = beat.bits(2, 0) + val formatValid = beat.fire() && first + val format = Mux(formatValid, formatBits, RegEnable(formatBits, formatValid)) + val formatOH = UIntToOH(format) + + // Create the receiver buffers + val hqa = Module(new HellaQueue(info.params.Qdepth)(beat.bits)) + val hqb = Module(new HellaQueue(info.params.Qdepth)(beat.bits)) + val hqc = Module(new HellaQueue(info.params.Qdepth)(beat.bits)) + val hqd = Module(new HellaQueue(info.params.Qdepth)(beat.bits)) + val hqe = Module(new HellaQueue(info.params.Qdepth)(beat.bits)) + + // Use these to save some typing; function to prevent renaming + private def hqX = Seq(hqa, hqb, hqc, hqd, hqe) + private def ioX = Seq(io.a, io.b, io.c, io.d, io.e) + + // Enqueue to the HellaQueues + (formatOH.toBools zip hqX) foreach { case (sel, hq) => + hq.io.enq.valid := beat.valid && sel + hq.io.enq.bits := beat.bits + assert (!hq.io.enq.valid || hq.io.enq.ready) // overrun impossible + } + + // Send HellaQueue output to their respective FSMs + (hqX zip ioX) foreach { case (hq, io) => + io <> ToAsyncBundle(hq.io.deq, info.params.crossingDepth) + } + + // Credits we need to hand-off to the TX FSM + val tx = RegInit(CreditBump(info.params, 0)) + val rx = RegInit(CreditBump(info.params, info.params.Qdepth)) + + // Constantly transmit credit updates + val txOut = Wire(Decoupled(new CreditBump(info.params))) + val rxOut = Wire(Decoupled(new CreditBump(info.params))) + txOut.valid := Bool(true) + rxOut.valid := Bool(true) + txOut.bits := tx + rxOut.bits := rx + io.txc <> ToAsyncBundle(txOut, 1) + io.rxc <> ToAsyncBundle(rxOut, 1) + + // Generate new RX credits as the HellaQueues drain + val rxInc = Wire(new CreditBump(info.params)) + (hqX zip rxInc.X) foreach { case (hq, inc) => + inc := hq.io.deq.fire().asUInt + } + + // Generate new TX credits as we receive F-format messages + val txInc = Mux(beat.valid && formatOH(5), CreditBump(info.params, beat.bits), CreditBump(info.params, 0)) + + // As we hand-over credits, reset the counters + tx := tx + txInc + rx := rx + rxInc + when (txOut.fire()) { tx := txInc } + when (rxOut.fire()) { rx := rxInc } +} diff --git a/src/main/scala/devices/chiplink/SinkA.scala b/src/main/scala/devices/chiplink/SinkA.scala new file mode 100644 index 0000000..532d1fd --- /dev/null +++ b/src/main/scala/devices/chiplink/SinkA.scala @@ -0,0 +1,65 @@ +// See LICENSE for license details. +package sifive.blocks.devices.chiplink + +import Chisel._ +import freechips.rocketchip.tilelink._ + +class SinkA(info: ChipLinkInfo) extends Module +{ + val io = new Bundle { + val a = Decoupled(new TLBundleA(info.edgeIn.bundle)).flip + val q = Decoupled(new DataLayer(info.params)) + } + + // Map TileLink sources to ChipLink sources+domain + val tl2cl = info.sourceMap + val source = info.mux(tl2cl.mapValues(_.source)) + val domain = info.mux(tl2cl.mapValues(_.domain)) + + // We need a Q because we stall the channel while serializing it's header + val inject = Module(new PartialInjector(io.a.bits)) + inject.io.i <> Queue(io.a, 1, flow=true) + inject.io.i_last := info.edgeIn.last(inject.io.i) + val a = inject.io.o + val a_last = inject.io.o_last + val a_hasData = info.edgeIn.hasData(a.bits) + val a_partial = a.bits.opcode === TLMessages.PutPartialData + + // A simple FSM to generate the packet components + val state = RegInit(UInt(0, width = 2)) + val s_header = UInt(0, width = 2) + val s_address0 = UInt(1, width = 2) + val s_address1 = UInt(2, width = 2) + val s_data = UInt(3, width = 2) + + when (io.q.fire()) { + switch (state) { + is (s_header) { state := s_address0 } + is (s_address0) { state := s_address1 } + is (s_address1) { state := Mux(a_hasData, s_data, s_header) } + is (s_data) { state := Mux(!a_last, s_data, s_header) } + } + } + + // Construct the header beat + val header = info.encode( + format = UInt(0), + opcode = a.bits.opcode, + param = a.bits.param, + size = a.bits.size, + domain = domain(a.bits.source), + source = source(a.bits.source)) + + // Construct the address beats + val address0 = a.bits.address + val address1 = a.bits.address >> 32 + + // Frame the output packet + val isLastState = state === Mux(a_hasData, s_data, s_address1) + a.ready := io.q.ready && isLastState + io.q.valid := a.valid + io.q.bits.last := a_last && isLastState + io.q.bits.data := Vec(header, address0, address1, a.bits.data)(state) + io.q.bits.beats := Mux(a_hasData, info.size2beats(a.bits.size), UInt(0)) + UInt(3) + + Mux(a_partial, info.mask2beats(a.bits.size), UInt(0)) +} diff --git a/src/main/scala/devices/chiplink/SinkB.scala b/src/main/scala/devices/chiplink/SinkB.scala new file mode 100644 index 0000000..6142a6b --- /dev/null +++ b/src/main/scala/devices/chiplink/SinkB.scala @@ -0,0 +1,62 @@ +// See LICENSE for license details. +package sifive.blocks.devices.chiplink + +import Chisel._ +import freechips.rocketchip.tilelink._ + +class SinkB(info: ChipLinkInfo) extends Module +{ + val io = new Bundle { + val b = Decoupled(new TLBundleB(info.edgeOut.bundle)).flip + val q = Decoupled(new DataLayer(info.params)) + } + + // We need a Q because we stall the channel while serializing it's header + val inject = Module(new PartialInjector(io.b.bits)) + inject.io.i <> Queue(io.b, 1, flow=true) + inject.io.i_last := info.edgeOut.last(inject.io.i) + val b = inject.io.o + val b_last = inject.io.o_last + val b_hasData = info.edgeOut.hasData(b.bits) + val b_partial = b.bits.opcode === TLMessages.PutPartialData + + // A simple FSM to generate the packet components + val state = RegInit(UInt(0, width = 2)) + val s_header = UInt(0, width = 2) + val s_address0 = UInt(1, width = 2) + val s_address1 = UInt(2, width = 2) + val s_data = UInt(3, width = 2) + + when (io.q.fire()) { + switch (state) { + is (s_header) { state := s_address0 } + is (s_address0) { state := s_address1 } + is (s_address1) { state := Mux(b_hasData, s_data, s_header) } + is (s_data) { state := Mux(!b_last, s_data, s_header) } + } + } + + // Construct the header beat + val header = info.encode( + format = UInt(1), + opcode = b.bits.opcode, + param = b.bits.param, + size = b.bits.size, + domain = UInt(0), // ChipLink only allows one remote cache, in domain 0 + source = UInt(0)) + + assert (!b.valid || b.bits.source === UInt(0)) + + // Construct the address beats + val address0 = b.bits.address + val address1 = b.bits.address >> 32 + + // Frame the output packet + val isLastState = state === Mux(b_hasData, s_data, s_address1) + b.ready := io.q.ready && isLastState + io.q.valid := b.valid + io.q.bits.last := b_last && isLastState + io.q.bits.data := Vec(header, address0, address1, b.bits.data)(state) + io.q.bits.beats := Mux(b_hasData, info.size2beats(b.bits.size), UInt(0)) + UInt(3) + + Mux(b_partial, info.mask2beats(b.bits.size), UInt(0)) +} diff --git a/src/main/scala/devices/chiplink/SinkC.scala b/src/main/scala/devices/chiplink/SinkC.scala new file mode 100644 index 0000000..0be1798 --- /dev/null +++ b/src/main/scala/devices/chiplink/SinkC.scala @@ -0,0 +1,63 @@ +// See LICENSE for license details. +package sifive.blocks.devices.chiplink + +import Chisel._ +import freechips.rocketchip.tilelink._ + +class SinkC(info: ChipLinkInfo) extends Module +{ + val io = new Bundle { + val c = Decoupled(new TLBundleC(info.edgeIn.bundle)).flip + val q = Decoupled(new DataLayer(info.params)) + } + + // Map TileLink sources to ChipLink sources+domain + val tl2cl = info.sourceMap + val source = info.mux(tl2cl.mapValues(_.source)) + val domain = info.mux(tl2cl.mapValues(_.domain)) + + // We need a Q because we stall the channel while serializing it's header + val c = Queue(io.c, 1, flow=true) + val c_last = info.edgeIn.last(c) + val c_hasData = info.edgeIn.hasData(c.bits) + val c_release = c.bits.opcode === TLMessages.Release || c.bits.opcode === TLMessages.ReleaseData + + // A simple FSM to generate the packet components + val state = RegInit(UInt(0, width = 2)) + val s_header = UInt(0, width = 2) + val s_address0 = UInt(1, width = 2) + val s_address1 = UInt(2, width = 2) + val s_data = UInt(3, width = 2) + + when (io.q.fire()) { + switch (state) { + is (s_header) { state := s_address0 } + is (s_address0) { state := s_address1 } + is (s_address1) { state := Mux(c_hasData, s_data, s_header) } + is (s_data) { state := Mux(!c_last, s_data, s_header) } + } + } + + // Construct the header beat + val header = info.encode( + format = UInt(2), + opcode = c.bits.opcode, + param = c.bits.param, + size = c.bits.size, + domain = UInt(0), // only caches (unordered) can release + source = Mux(c_release, source(c.bits.source), UInt(0))) + + assert (!c.valid || domain(c.bits.source) === UInt(0)) + + // Construct the address beats + val address0 = c.bits.address + val address1 = c.bits.address >> 32 + + // Frame the output packet + val isLastState = state === Mux(c_hasData, s_data, s_address1) + c.ready := io.q.ready && isLastState + io.q.valid := c.valid + io.q.bits.last := c_last && isLastState + io.q.bits.data := Vec(header, address0, address1, c.bits.data)(state) + io.q.bits.beats := Mux(c_hasData, info.size2beats(c.bits.size), UInt(0)) + UInt(3) +} diff --git a/src/main/scala/devices/chiplink/SinkD.scala b/src/main/scala/devices/chiplink/SinkD.scala new file mode 100644 index 0000000..de12788 --- /dev/null +++ b/src/main/scala/devices/chiplink/SinkD.scala @@ -0,0 +1,60 @@ +// See LICENSE for license details. +package sifive.blocks.devices.chiplink + +import Chisel._ +import freechips.rocketchip.tilelink._ + +class SinkD(info: ChipLinkInfo) extends Module +{ + val io = new Bundle { + val d = Decoupled(new TLBundleD(info.edgeOut.bundle)).flip + val q = Decoupled(new DataLayer(info.params)) + val a_tlSource = Valid(UInt(width = info.params.sourceBits)) + val a_clSource = UInt(INPUT, width = info.params.clSourceBits) + val c_tlSource = Valid(UInt(width = info.params.sourceBits)) + val c_clSource = UInt(INPUT, width = info.params.clSourceBits) + } + + // The FSM states + val state = RegInit(UInt(0, width = 2)) + val s_header = UInt(0, width = 2) + val s_sink = UInt(1, width = 2) + val s_data = UInt(2, width = 2) + + // We need a Q because we stall the channel while serializing it's header + val d = Queue(io.d, 1, flow=true) + val d_last = info.edgeOut.last(d) + val d_hasData = info.edgeOut.hasData(d.bits) + val d_grant = d.bits.opcode === TLMessages.Grant || d.bits.opcode === TLMessages.GrantData + + when (io.q.fire()) { + switch (state) { + is (s_header) { state := Mux(d_grant, s_sink, Mux(d_hasData, s_data, s_header)) } + is (s_sink) { state := Mux(d_hasData, s_data, s_header) } + is (s_data) { state := Mux(d_last, s_header, s_data) } + } + } + + // Release the TL source + val relack = d.bits.opcode === TLMessages.ReleaseAck + io.a_tlSource.valid := io.q.fire() && state === s_header && !relack + io.a_tlSource.bits := d.bits.source + io.c_tlSource.valid := io.q.fire() && state === s_header && relack + io.c_tlSource.bits := d.bits.source + + // Construct the header beat + val header = info.encode( + format = UInt(3), + opcode = d.bits.opcode, + param = d.bits.param, + size = d.bits.size, + domain = d.bits.source >> log2Ceil(info.params.sourcesPerDomain), + source = Mux(relack, io.c_clSource, io.a_clSource)) + + val isLastState = state === Mux(d_hasData, s_data, Mux(d_grant, s_sink, s_header)) + d.ready := io.q.ready && isLastState + io.q.valid := d.valid + io.q.bits.last := d_last && isLastState + io.q.bits.data := Vec(header, d.bits.sink, d.bits.data)(state) + io.q.bits.beats := Mux(d_hasData, info.size2beats(d.bits.size), UInt(0)) + UInt(1) + d_grant.asUInt +} diff --git a/src/main/scala/devices/chiplink/SinkE.scala b/src/main/scala/devices/chiplink/SinkE.scala new file mode 100644 index 0000000..b466a9c --- /dev/null +++ b/src/main/scala/devices/chiplink/SinkE.scala @@ -0,0 +1,33 @@ +// See LICENSE for license details. +package sifive.blocks.devices.chiplink + +import Chisel._ +import freechips.rocketchip.tilelink._ + +class SinkE(info: ChipLinkInfo) extends Module +{ + val io = new Bundle { + val e = Decoupled(new TLBundleE(info.edgeIn.bundle)).flip + val q = Decoupled(new DataLayer(info.params)) + // Find the sink from D + val d_tlSink = Valid(UInt(width = info.params.sinkBits)) + val d_clSink = UInt(INPUT, width = info.params.clSinkBits) + } + + io.d_tlSink.valid := io.e.fire() + io.d_tlSink.bits := io.e.bits.sink + + val header = info.encode( + format = UInt(4), + opcode = UInt(0), + param = UInt(0), + size = UInt(0), + domain = UInt(0), + source = io.d_clSink) + + io.e.ready := io.q.ready + io.q.valid := io.e.valid + io.q.bits.last := Bool(true) + io.q.bits.data := header + io.q.bits.beats := UInt(1) +} diff --git a/src/main/scala/devices/chiplink/SourceA.scala b/src/main/scala/devices/chiplink/SourceA.scala new file mode 100644 index 0000000..16ae648 --- /dev/null +++ b/src/main/scala/devices/chiplink/SourceA.scala @@ -0,0 +1,104 @@ +// See LICENSE for license details. +package sifive.blocks.devices.chiplink + +import Chisel._ +import freechips.rocketchip.tilelink._ +import freechips.rocketchip.util._ + +class SourceA(info: ChipLinkInfo) extends Module +{ + val io = new Bundle { + val a = Decoupled(new TLBundleA(info.edgeOut.bundle)) + val q = Decoupled(UInt(width = info.params.dataBits)).flip + // Used by D to find the txn + val d_tlSource = Valid(UInt(width = info.params.sourceBits)).flip + val d_clSource = UInt(OUTPUT, width = info.params.clSourceBits) + } + + // CAM of sources used for each domain + val cams = Seq.fill(info.params.domains) { + Module(new CAM(info.params.sourcesPerDomain, info.params.clSourceBits)) + } + + // A simple FSM to generate the packet components + val state = RegInit(UInt(0, width = 2)) + val s_header = UInt(0, width = 2) + val s_address0 = UInt(1, width = 2) + val s_address1 = UInt(2, width = 2) + val s_data = UInt(3, width = 2) + + private def hold(key: UInt)(data: UInt) = { + val enable = state === key + Mux(enable, data, RegEnable(data, enable)) + } + + // Extract header fields + val Seq(_, q_opcode, q_param, q_size, q_domain, q_source) = + info.decode(io.q.bits).map(hold(s_header) _) + + // Latch address + val q_address0 = hold(s_address0)(io.q.bits) + val q_address1 = hold(s_address1)(io.q.bits) + + val (_, q_last) = info.firstlast(io.q, Some(UInt(0))) + val q_hasData = !q_opcode(2) + val a_first = RegEnable(state =/= s_data, io.q.fire()) + + when (io.q.fire()) { + switch (state) { + is (s_header) { state := s_address0 } + is (s_address0) { state := s_address1 } + is (s_address1) { state := Mux(q_hasData, s_data, s_header) } + is (s_data) { state := Mux(!q_last, s_data, s_header) } + } + } + + // Determine if the request is legal. If not, route to error device. + val q_address = Cat(q_address1, q_address0) + val q_acq = q_opcode === TLMessages.AcquireBlock || q_opcode === TLMessages.AcquirePerm + val q_write = Mux(q_acq, q_param === TLPermissions.NtoT || q_param === TLPermissions.BtoT, q_hasData) + val exists = info.edgeOut.manager.containsSafe(q_address) + private def writeable(m: TLManagerParameters): Boolean = if (m.supportsAcquireB) m.supportsAcquireT else m.supportsPutFull + private def acquireable(m: TLManagerParameters): Boolean = m.supportsAcquireB || m.supportsAcquireT + private def toBool(x: Boolean) = Bool(x) + val writeOk = info.edgeOut.manager.fastProperty(q_address, writeable, toBool) + val acquireOk = info.edgeOut.manager.fastProperty(q_address, acquireable, toBool) + val q_legal = exists && (!q_write || writeOk) && (!q_acq || acquireOk) + + // Look for an available source in the correct domain + val source_ok = Vec(cams.map(_.io.alloc.ready))(q_domain) + val source = Vec(cams.map(_.io.key))(q_domain) holdUnless a_first + val a_sel = UIntToOH(q_domain) + + // Feed our preliminary A channel via the Partial Extractor FSM + val extract = Module(new ParitalExtractor(io.a.bits)) + io.a <> extract.io.o + val a = extract.io.i + extract.io.last := q_last + + a.bits.opcode := q_opcode + a.bits.param := q_param + a.bits.size := q_size + a.bits.source := Cat(q_domain, source) + a.bits.address := info.makeError(q_legal, q_address) + a.bits.mask := MaskGen(q_address0, q_size, info.params.dataBytes) + a.bits.data := io.q.bits + + val stall = a_first && !source_ok + val xmit = q_last || state === s_data + a.valid := (io.q.valid && !stall) && xmit + io.q.ready := (a.ready && !stall) || !xmit + (cams zip a_sel.toBools) foreach { case (cam, sel) => + cam.io.alloc.valid := sel && a_first && xmit && io.q.valid && a.ready + cam.io.alloc.bits := q_source + } + + // Free the CAM entries + val d_clDomain = io.d_tlSource.bits >> log2Ceil(info.params.sourcesPerDomain) + val d_sel = UIntToOH(d_clDomain) + io.d_clSource := Vec(cams.map(_.io.data))(d_clDomain) + (cams zip d_sel.toBools) foreach { case (cam, sel) => + cam.io.free.bits := io.d_tlSource.bits + cam.io.free.valid := io.d_tlSource.valid && sel + } +} diff --git a/src/main/scala/devices/chiplink/SourceB.scala b/src/main/scala/devices/chiplink/SourceB.scala new file mode 100644 index 0000000..863ae12 --- /dev/null +++ b/src/main/scala/devices/chiplink/SourceB.scala @@ -0,0 +1,68 @@ +// See LICENSE for license details. +package sifive.blocks.devices.chiplink + +import Chisel._ +import freechips.rocketchip.tilelink._ +import freechips.rocketchip.util._ + +class SourceB(info: ChipLinkInfo) extends Module +{ + val io = new Bundle { + val b = Decoupled(new TLBundleB(info.edgeIn.bundle)) + val q = Decoupled(UInt(width = info.params.dataBits)).flip + } + + // Find the optional cache (at most one) + val cache = info.edgeIn.client.clients.filter(_.supportsProbe).headOption + + // A simple FSM to generate the packet components + val state = RegInit(UInt(0, width = 2)) + val s_header = UInt(0, width = 2) + val s_address0 = UInt(1, width = 2) + val s_address1 = UInt(2, width = 2) + val s_data = UInt(3, width = 2) + + private def hold(key: UInt)(data: UInt) = { + val enable = state === key + Mux(enable, data, RegEnable(data, enable)) + } + + // Extract header fields + val Seq(_, q_opcode, q_param, q_size, _, _) = + info.decode(io.q.bits).map(hold(s_header) _) + + // Latch address + val q_address0 = hold(s_address0)(io.q.bits) + val q_address1 = hold(s_address1)(io.q.bits) + + val (_, q_last) = info.firstlast(io.q, Some(UInt(1))) + val q_hasData = !q_opcode(2) + val b_first = RegEnable(state =/= s_data, io.q.fire()) + + when (io.q.fire()) { + switch (state) { + is (s_header) { state := s_address0 } + is (s_address0) { state := s_address1 } + is (s_address1) { state := Mux(q_hasData, s_data, s_header) } + is (s_data) { state := Mux(!q_last, s_data, s_header) } + } + } + + // Feed our preliminary B channel via the Partial Extractor FSM + val extract = Module(new ParitalExtractor(io.b.bits)) + io.b <> extract.io.o + val b = extract.io.i + extract.io.last := q_last + + b.bits.opcode := q_opcode + b.bits.param := q_param + b.bits.size := q_size + b.bits.source := UInt(cache.map(_.sourceId.start).getOrElse(0)) + b.bits.address := Cat(q_address1, q_address0) + b.bits.mask := MaskGen(q_address0, q_size, info.params.dataBytes) + b.bits.data := io.q.bits + + val xmit = q_last || state === s_data + b.valid := io.q.valid && xmit + io.q.ready := b.ready || !xmit +} diff --git a/src/main/scala/devices/chiplink/SourceC.scala b/src/main/scala/devices/chiplink/SourceC.scala new file mode 100644 index 0000000..dd96f32 --- /dev/null +++ b/src/main/scala/devices/chiplink/SourceC.scala @@ -0,0 +1,87 @@ +// See LICENSE for license details. +package sifive.blocks.devices.chiplink + +import Chisel._ +import freechips.rocketchip.tilelink._ +import freechips.rocketchip.util._ + +class SourceC(info: ChipLinkInfo) extends Module +{ + val io = new Bundle { + val c = Decoupled(new TLBundleC(info.edgeOut.bundle)) + val q = Decoupled(UInt(width = info.params.dataBits)).flip + // Used by D to find the txn + val d_tlSource = Valid(UInt(width = info.params.sourceBits)).flip + val d_clSource = UInt(OUTPUT, width = info.params.clSourceBits) + } + + // CAM of sources used for release + val cam = Module(new CAM(info.params.sourcesPerDomain, info.params.clSourceBits)) + + // A simple FSM to generate the packet components + val state = RegInit(UInt(0, width = 2)) + val s_header = UInt(0, width = 2) + val s_address0 = UInt(1, width = 2) + val s_address1 = UInt(2, width = 2) + val s_data = UInt(3, width = 2) + + private def hold(key: UInt)(data: UInt) = { + val enable = state === key + Mux(enable, data, RegEnable(data, enable)) + } + + // Extract header fields + val Seq(_, q_opcode, q_param, q_size, _, q_source) = + info.decode(io.q.bits).map(hold(s_header) _) + + // Latch address + val q_address0 = hold(s_address0)(io.q.bits) + val q_address1 = hold(s_address1)(io.q.bits) + + val (_, q_last) = info.firstlast(io.q, Some(UInt(2))) + val q_hasData = q_opcode(0) + val c_first = RegEnable(state =/= s_data, io.q.fire()) + + when (io.q.fire()) { + switch (state) { + is (s_header) { state := s_address0 } + is (s_address0) { state := s_address1 } + is (s_address1) { state := Mux(q_hasData, s_data, s_header) } + is (s_data) { state := Mux(!q_last, s_data, s_header) } + } + } + + // Determine if the request is legal. If not, route to error device. + val q_address = Cat(q_address1, q_address0) + val exists = info.edgeOut.manager.containsSafe(q_address) + private def writeable(m: TLManagerParameters): Boolean = if (m.supportsAcquireB) m.supportsAcquireT else m.supportsPutFull + private def acquireable(m: TLManagerParameters): Boolean = m.supportsAcquireB || m.supportsAcquireT + private def toBool(x: Boolean) = Bool(x) + val writeOk = info.edgeOut.manager.fastProperty(q_address, writeable, toBool) + val acquireOk = info.edgeOut.manager.fastProperty(q_address, acquireable, toBool) + val q_legal = exists && (!q_hasData || writeOk) && acquireOk + + // Look for an available source in the correct domain + val q_release = q_opcode === TLMessages.Release || q_opcode === TLMessages.ReleaseData + val source_ok = !q_release || cam.io.alloc.ready + val source = cam.io.key holdUnless c_first + + io.c.bits.opcode := q_opcode + io.c.bits.param := q_param + io.c.bits.size := q_size + io.c.bits.source := Mux(q_release, source, UInt(0)) // always domain 0 + io.c.bits.address := info.makeError(q_legal, q_address) + io.c.bits.data := io.q.bits + io.c.bits.error := Bool(false) // !!! need a packet footer + + val stall = c_first && !source_ok + val xmit = q_last || state === s_data + io.c.valid := (io.q.valid && !stall) && xmit + io.q.ready := (io.c.ready && !stall) || !xmit + cam.io.alloc.valid := q_release && c_first && xmit && io.q.valid && io.c.ready + cam.io.alloc.bits := q_source + + // Free the CAM entries + io.d_clSource := cam.io.data + cam.io.free := io.d_tlSource +} diff --git a/src/main/scala/devices/chiplink/SourceD.scala b/src/main/scala/devices/chiplink/SourceD.scala new file mode 100644 index 0000000..bcf0545 --- /dev/null +++ b/src/main/scala/devices/chiplink/SourceD.scala @@ -0,0 +1,82 @@ +// See LICENSE for license details. +package sifive.blocks.devices.chiplink + +import Chisel._ +import freechips.rocketchip.tilelink._ +import freechips.rocketchip.util._ + +class SourceD(info: ChipLinkInfo) extends Module +{ + val io = new Bundle { + val d = Decoupled(new TLBundleD(info.edgeIn.bundle)) + val q = Decoupled(UInt(width = info.params.dataBits)).flip + // Used by E to find the txn + val e_tlSink = Valid(UInt(width = info.params.sinkBits)).flip + val e_clSink = UInt(OUTPUT, width = info.params.clSinkBits) + } + + // We need a sink id CAM + val cam = Module(new CAM(info.params.sinks, info.params.clSinkBits)) + + // Map ChipLink transaction to TileLink source + val cl2tl = info.sourceMap.map(_.swap) + val nestedMap = cl2tl.groupBy(_._1.domain).mapValues(_.map { case (TXN(_, cls), tls) => (cls, tls) }) + val muxes = Seq.tabulate(info.params.domains) { i => + info.mux(nestedMap.lift(i).getOrElse(Map(0 -> 0))) + } + + // The FSM states + val state = RegInit(UInt(0, width = 2)) + val s_header = UInt(0, width = 2) + val s_sink = UInt(1, width = 2) + val s_data = UInt(2, width = 2) + + private def hold(key: UInt)(data: UInt) = { + val enable = state === key + Mux(enable, data, RegEnable(data, enable)) + } + + // Extract header fields from the message + val Seq(_, q_opcode, q_param, q_size, q_domain, q_source) = + info.decode(io.q.bits).map(hold(s_header) _) + + // Extract sink from the optional second beat + val q_sink = hold(s_sink)(io.q.bits(15, 0)) + + val q_grant = q_opcode === TLMessages.Grant || q_opcode === TLMessages.GrantData + val (_, q_last) = info.firstlast(io.q, Some(UInt(3))) + val d_first = RegEnable(state =/= s_data, io.q.fire()) + val s_maybe_data = Mux(q_last, s_header, s_data) + + when (io.q.fire()) { + switch (state) { + is (s_header) { state := Mux(q_grant, s_sink, s_maybe_data) } + is (s_sink) { state := s_maybe_data } + is (s_data) { state := s_maybe_data } + } + } + + // Look for an available sink + val sink_ok = !q_grant || cam.io.alloc.ready + val sink = cam.io.key holdUnless d_first + val stall = d_first && !sink_ok + val xmit = q_last || state === s_data + + io.d.bits.opcode := q_opcode + io.d.bits.param := q_param + io.d.bits.size := q_size + io.d.bits.source := Vec(muxes.map { m => m(q_source) })(q_domain) + io.d.bits.sink := Mux(q_grant, sink, UInt(0)) + io.d.bits.data := io.q.bits + io.d.bits.error := Bool(false) // !!! frack => need packet footer? + + io.d.valid := (io.q.valid && !stall) && xmit + io.q.ready := (io.d.ready && !stall) || !xmit + + cam.io.alloc.valid := q_grant && d_first && xmit && io.q.valid && io.d.ready + cam.io.alloc.bits := q_sink + + // Free the CAM + io.e_clSink := cam.io.data + cam.io.free := io.e_tlSink +} diff --git a/src/main/scala/devices/chiplink/SourceE.scala b/src/main/scala/devices/chiplink/SourceE.scala new file mode 100644 index 0000000..eaeca46 --- /dev/null +++ b/src/main/scala/devices/chiplink/SourceE.scala @@ -0,0 +1,21 @@ +// See LICENSE for license details. +package sifive.blocks.devices.chiplink + +import Chisel._ +import freechips.rocketchip.tilelink._ +import freechips.rocketchip.util._ + +class SourceE(info: ChipLinkInfo) extends Module +{ + val io = new Bundle { + val e = Decoupled(new TLBundleE(info.edgeOut.bundle)) + val q = Decoupled(UInt(width = info.params.dataBits)).flip + } + + // Extract header fields + val Seq(_, _, _, _, _, q_sink) = info.decode(io.q.bits) + + io.q.ready := io.e.ready + io.e.valid := io.q.valid + io.e.bits.sink := q_sink +} diff --git a/src/main/scala/devices/chiplink/TX.scala b/src/main/scala/devices/chiplink/TX.scala new file mode 100644 index 0000000..4983df2 --- /dev/null +++ b/src/main/scala/devices/chiplink/TX.scala @@ -0,0 +1,99 @@ +// See LICENSE for license details. +package sifive.blocks.devices.chiplink + +import Chisel._ +import freechips.rocketchip.tilelink._ +import freechips.rocketchip.util._ + +class TX(info: ChipLinkInfo) extends Module +{ + val io = new Bundle { + val c2b_send = Bool(OUTPUT) + val c2b_data = UInt(OUTPUT, info.params.dataBits) + val a = new AsyncBundle(info.params.crossingDepth, new DataLayer(info.params)).flip + val b = new AsyncBundle(info.params.crossingDepth, new DataLayer(info.params)).flip + val c = new AsyncBundle(info.params.crossingDepth, new DataLayer(info.params)).flip + val d = new AsyncBundle(info.params.crossingDepth, new DataLayer(info.params)).flip + val e = new AsyncBundle(info.params.crossingDepth, new DataLayer(info.params)).flip + val sa = DecoupledIO(new DataLayer(info.params)).flip + val sb = DecoupledIO(new DataLayer(info.params)).flip + val sc = DecoupledIO(new DataLayer(info.params)).flip + val sd = DecoupledIO(new DataLayer(info.params)).flip + val se = DecoupledIO(new DataLayer(info.params)).flip + val rxc = new AsyncBundle(1, new CreditBump(info.params)).flip + val txc = new AsyncBundle(1, new CreditBump(info.params)).flip + } + + // Currently available credits + val rx = RegInit(CreditBump(info.params, 0)) + val tx = RegInit(CreditBump(info.params, 0)) + val first = RegInit(Bool(true)) + + // Constantly pull credits from RX + val rxInc = FromAsyncBundle(io.rxc) + val txInc = FromAsyncBundle(io.txc) + rxInc.ready := Bool(true) + txInc.ready := Bool(true) + + // Cross the requests (if necessary) + val sync = info.params.syncTX + val a = if (sync) ShiftQueue(io.sa, 2) else FromAsyncBundle(io.a) + val b = if (sync) ShiftQueue(io.sb, 2) else FromAsyncBundle(io.b) + val c = if (sync) ShiftQueue(io.sc, 2) else FromAsyncBundle(io.c) + val d = if (sync) ShiftQueue(io.sd, 2) else FromAsyncBundle(io.d) + val e = if (sync) ShiftQueue(io.se, 2) else FromAsyncBundle(io.e) + + private def ioX = Seq(a, b, c, d, e) + val validABCDE = Cat(ioX.map(_.valid).reverse) + + // Calculate if the packet will fit + val txDec = CreditBump(info.params, 0) + val spaceABCDE = Cat(((tx.X zip txDec.X) zip ioX) .map { case ((credit, reduce), beat) => + val delta = credit -& beat.bits.beats + reduce := Mux(beat.fire() && first, delta, credit) + delta.asSInt >= SInt(0) + }.reverse) + val requestABCDE = validABCDE & spaceABCDE + + // How often should we force transmission of a credit update? sqrt + val xmitBits = log2Ceil(info.params.Qdepth) / 2 + val xmit = RegInit(UInt(0, width = xmitBits)) + val forceXmit = xmit === UInt(0) + + // Frame an update of the RX credits + val (header, rxLeft) = rx.toHeader + val f = Wire(Decoupled(new DataLayer(info.params))) + f.valid := requestABCDE === UInt(0) || forceXmit + f.bits.data := header + f.bits.last := Bool(true) + f.bits.beats := UInt(1) + + when (!forceXmit) { xmit := xmit - UInt(1) } + when (f.fire()) { xmit := ~UInt(0, width = xmitBits) } + + // Include the F credit channel in arbitration + val ioF = ioX :+ f + val space = Cat(UInt(1), spaceABCDE) + val request = Cat(f.valid, requestABCDE) + val valid = Cat(f.valid, validABCDE) + + // Select a channel to transmit from those with data and space + val lasts = Cat(ioF.map(_.bits.last).reverse) + val readys = TLArbiter.roundRobin(6, request, first) + val winner = readys & request + val state = RegInit(UInt(0, width=6)) + val grant = Mux(first, winner, state) + val allowed = Mux(first, readys & space, state) + (ioF zip allowed.toBools) foreach { case (beat, sel) => beat.ready := sel } + + state := grant + first := (grant & lasts).orR + + // Form the output beat + io.c2b_send := RegNext(RegNext(first || (state & valid) =/= UInt(0), Bool(false)), Bool(false)) + io.c2b_data := RegNext(Mux1H(RegNext(grant), RegNext(Vec(ioF.map(_.bits.data))))) + + // Update the credit trackers + rx := Mux(f.fire(), rxLeft, rx) + Mux(rxInc.fire(), rxInc.bits, CreditBump(info.params, 0)) + tx := txDec + Mux(txInc.fire(), txInc.bits, CreditBump(info.params, 0)) +}