From a52d418439f56f791e05626508fdb933961eb189 Mon Sep 17 00:00:00 2001 From: "Wesley W. Terpstra" Date: Wed, 20 Jul 2016 13:58:49 -0700 Subject: [PATCH] fragmenter: support multi-beat get/put via fragmenting to single-beat operations --- .../src/main/scala/converters/Tilelink.scala | 151 ++++++++++++++++++ 1 file changed, 151 insertions(+) diff --git a/uncore/src/main/scala/converters/Tilelink.scala b/uncore/src/main/scala/converters/Tilelink.scala index a3c0349d..6cb15f91 100644 --- a/uncore/src/main/scala/converters/Tilelink.scala +++ b/uncore/src/main/scala/converters/Tilelink.scala @@ -538,3 +538,154 @@ class TileLinkIONarrower(innerTLId: String, outerTLId: String) sending_get := Bool(false) } } + +class TileLinkFragmenterSource(implicit p: Parameters) extends TLModule()(p) { + val io = new Bundle { + val in = Decoupled(new Acquire).flip + val out = Decoupled(new Acquire) + val que = Decoupled(UInt(width = tlBeatAddrBits)) + } + + // Pipeline stage with acquire data; needed to ensure in.bits stay fixed when !in.ready + val acq_valid = RegInit(Bool(false)) + val acq_bits = Reg(new Acquire) + // The last beat of generate acquire to send + val acq_last_beat = Reg(UInt(width = tlBeatAddrBits)) + val acq_last = acq_bits.addr_beat === acq_last_beat + + // 'in' has the first beat? + val in_multi_put = io.in.bits.isBuiltInType(Acquire.putBlockType) + val in_multi_get = io.in.bits.isBuiltInType(Acquire.getBlockType) + val in_first_beat = !in_multi_put || io.in.bits.addr_beat === UInt(0) + + // Move stuff from acq to out whenever out is ready + io.out.valid := acq_valid + // When can acq accept a request? + val acq_ready = !acq_valid || (acq_last && io.out.ready) + // Move the first beat from in to acq only when both acq and que are ready + io.in.ready := (!in_first_beat || io.que.ready) && acq_ready + io.que.valid := (in_first_beat && io.in.valid) && acq_ready + + // in.fire moves data from in to acq and (optionally) que + // out.fire moves data from acq to out + + // Desired flow control results: + assert (!io.que.fire() || io.in.fire()) // 1. que.fire => in.fire + assert (!(io.in.fire() && in_first_beat) || io.que.fire()) // 2. in.fire && in_first_beat => que.fire + assert (!io.out.fire() || acq_valid) // 3. out.fire => acq_valid + assert (!io.in.fire() || (!acq_valid || (io.out.fire() && acq_last))) // 4. in.fire => !acq_valid || (out.fire && acq_last) + // Proofs: + // 1. que.fire => que.ready && in.valid && acq_ready => in.ready && in.valid + // 2. in.fire && in_first_beat => in.valid && acq_ready && [(!in_first_beat || que.ready) && in_first_beat] => + // in.valid && acq_ready && que.ready && in_first_beat => que.valid && que.ready + // 3. out.fire => out.valid => acq_valid + // 4. in.fire => acq_ready => !acq_valid || (acq_last && out.ready) => + // !acq_valid || (acq_valid && acq_last && out.ready) => !acq_valid || (acq_last && out.fire) + + val multi_size = SInt(-1, width = tlBeatAddrBits).asUInt // TL2: use in.bits.size()/beatBits-1 + val in_sizeMinus1 = Mux(in_multi_get || in_multi_put, multi_size, UInt(0)) + val in_insertSizeMinus1 = Mux(in_multi_get, multi_size, UInt(0)) + + when (io.in.fire()) { + // Theorem 4 makes this safe; we overwrite garbage, or replace the final acq + acq_valid := Bool(true) + acq_bits := io.in.bits + acq_last_beat := io.in.bits.addr_beat + in_insertSizeMinus1 + // Replace this with size truncation in TL2: + acq_bits.a_type := Mux(in_multi_put, Acquire.putType, Mux(in_multi_get, Acquire.getType, io.in.bits.a_type)) + } .elsewhen (io.out.fire()) { + acq_valid := !acq_last // false => !in.valid || (!que.ready && in_first_beat) + acq_bits.addr_beat := acq_bits.addr_beat + UInt(1) + // acq_last && out.fire => acq_last && out.ready && acq_valid => acq_ready + // Suppose in.valid, then !in.fire => !in.ready => !(!in_first_beat || que.ready) => !que.ready && in_first_beat + } + + // Safe by theorem 3 + io.out.bits := acq_bits + // Safe by theorem 1 + io.que.bits := in_sizeMinus1 +} + +class TileLinkFragmenterSink(implicit p: Parameters) extends TLModule()(p) { + val io = new Bundle { + val in = Decoupled(new Grant).flip + val out = Decoupled(new Grant) + val que = Decoupled(UInt(width = tlBeatAddrBits)).flip + } + + val count_valid = RegInit(Bool(false)) + val multi_op = Reg(Bool()) + val count_bits = Reg(UInt(width = tlBeatAddrBits)) + val last = count_bits === UInt(0) + + val in_put = io.in.bits.isBuiltInType(Grant.putAckType) + val in_get = io.in.bits.isBuiltInType(Grant.getDataBeatType) + val deliver = last || in_get + + // Accept the input, discarding the non-final put grant + io.in.ready := count_valid && (io.out.ready || !deliver) + // Output the grant whenever we want delivery + io.out.valid := count_valid && io.in.valid && deliver + // Take a new number whenever we deliver the last beat + io.que.ready := !count_valid || (io.in.valid && io.out.ready && last) + + // Desired flow control results: + assert (!io.out.fire() || (count_valid && io.in.fire())) // 1. out.fire => in.fire && count_valid + assert (!(io.in.fire() && deliver) || io.out.fire()) // 2. in.fire && deliver => out.fire + assert (!(io.out.fire() && last) || io.que.ready) // 3. out.fire && last => que.ready + assert (!io.que.fire() || (!count_valid || io.out.fire())) // 4. que.fire => !count_valid || (out.fire && last) + // Proofs: + // 1. out.fire => out.ready && (count_valid && in.valid && deliver) => (count_valid && out.ready) && in.valid => in.fire + // 2. in.fire && deliver => in.valid && count_valid && [(out.ready || !deliver) && deliver] => + // in.valid && count_valid && deliver && out.ready => out.fire + // 3. out.fire && last => out.valid && out.ready && last => in.valid && out.ready && last => que.ready + // 4. que.fire => que.valid && (!count_valid || (in.valid && out.ready && last)) + // => !count_valid || (count_valid && in.valid && out.ready && [last => deliver]) + // => !count_valid || (out.valid && out.ready && last) + + when (io.que.fire()) { + // Theorem 4 makes this safe; we overwrite garbage or last output + count_valid := Bool(true) + count_bits := io.que.bits + multi_op := io.que.bits =/= UInt(0) + } .elsewhen (io.in.fire()) { + count_valid := !last // false => !que.valid + count_bits := count_bits - UInt(1) + // Proof: in.fire && [last => deliver] =2=> out.fire && last =3=> que.ready + // !que.fire && que.ready => !que.valid + } + + // Safe by Theorem 1 + io.out.bits := io.in.bits + io.out.bits.g_type := Mux(multi_op, Mux(in_get, Grant.getDataBlockType, Grant.putAckType), io.in.bits.g_type) +} + +class TileLinkFragmenter(depth: Int = 1)(implicit p: Parameters) extends TLModule()(p) { + val io = new Bundle { + val in = new ClientUncachedTileLinkIO().flip + val out = new ClientUncachedTileLinkIO + } + + // TL2: + // supportsAcquire = false + // modify all outward managers to supportsMultibeat = true + // assert: all managers must behaveFIFO (not inspect duplicated id field) + + val source = Module(new TileLinkFragmenterSource) + val sink = Module(new TileLinkFragmenterSink) + sink.io.que <> Queue(source.io.que, depth) + + source.io.in <> io.in.acquire + io.out.acquire <> source.io.out + sink.io.in <> io.out.grant + io.in.grant <> sink.io.out +} + +object TileLinkFragmenter { + // Pass the source/client to fragment + def apply(source: ClientUncachedTileLinkIO, depth: Int = 1)(implicit p: Parameters): ClientUncachedTileLinkIO = { + val fragmenter = Module(new TileLinkFragmenter(depth)) + fragmenter.io.in <> source + fragmenter.io.out + } +}