From 33a05786db25921835a540cdda8d943585f06cc4 Mon Sep 17 00:00:00 2001 From: "Wesley W. Terpstra" Date: Tue, 13 Sep 2016 15:44:36 -0700 Subject: [PATCH] tilelink2 RAMModel: fix put, get, putAck, getAck case (#282) This case should result in undefined data for the Get. It was previously requiring the Get to return the new Put data, which is only guaranteed by a FIFO device. --- .../scala/uncore/tilelink2/Parameters.scala | 4 ++ .../scala/uncore/tilelink2/RAMModel.scala | 71 +++++++++++++++---- 2 files changed, 62 insertions(+), 13 deletions(-) diff --git a/src/main/scala/uncore/tilelink2/Parameters.scala b/src/main/scala/uncore/tilelink2/Parameters.scala index fb36c41d..a6891af5 100644 --- a/src/main/scala/uncore/tilelink2/Parameters.scala +++ b/src/main/scala/uncore/tilelink2/Parameters.scala @@ -179,6 +179,10 @@ case class TLManagerPortParameters(managers: Seq[TLManagerParameters], beatBytes def findById(id: UInt) = Vec(managers.map(_.sinkId.contains(id))) def findId(address: UInt) = Mux1H(find(address), managers.map(m => UInt(m.sinkId.start))) + // Note: returns the actual fifoId + 1 or 0 if None + def findFifoId(address: UInt) = Mux1H(find(address), managers.map(m => UInt(m.fifoId.map(_+1).getOrElse(0)))) + def hasFifoId(address: UInt) = Mux1H(find(address), managers.map(m => Bool(m.fifoId.isDefined))) + // !!! need a cheaper version of find, where we assume a valid address match exists // Does this Port manage this ID/address? diff --git a/src/main/scala/uncore/tilelink2/RAMModel.scala b/src/main/scala/uncore/tilelink2/RAMModel.scala index ce3ddb70..d6609174 100644 --- a/src/main/scala/uncore/tilelink2/RAMModel.scala +++ b/src/main/scala/uncore/tilelink2/RAMModel.scala @@ -5,6 +5,21 @@ package uncore.tilelink2 import Chisel._ import chisel3.util.LFSR16 +// We detect concurrent puts that put memory into an undefined state. +// put0, put0Ack, put1, put1Ack => ok: defined +// put0, put1, put0Ack, put1Ack => ok: put1 clears valid (it sees busy>0) defined for FIFO +// put0, put1, put1Ack, put0Ack => ok: put1 clears valid (it sees busy>0) defined for FIFO +// When the region is FIFO, all writes leave 'valid' set (concurrent puts have defined behaviour) + +// We detect concurrent puts that invalidate an inflight get. +// get, getAck, put, putAck => ok: defined +// get, put, getAck, putAck => ok: detected by getAck (it sees busy>0) +// get, put, putAck, getAck => ok: putAck uses CAM to wipe get validity impossible for FIFO +// put, putAck, get, getAck => ok: defined +// put, get, putAck, getAck => ok: putAck uses CAM to wipe get validity defined for FIFO +// put, get, getAck, putAck => ok: detected by getAck (it sees busy>0) impossible for FIFO +// If FIFO, the getAck should check data even if its validity was wiped + class TLRAMModel extends LazyModule { val node = TLIdentityNode() @@ -78,8 +93,9 @@ class TLRAMModel extends LazyModule val inc_trees_wen = Wire(init = Fill(decTrees, wipe)) val dec_trees_wen = Wire(init = Fill(decTrees, wipe)) - // This requires either distributed memory or registers (no register on either input or output) - val flight = Mem(endSourceId, new FlightMonitor) + // This must be registers b/c we build a CAM from it + val flight = Reg(Vec(endSourceId, new FlightMonitor)) + val valid = Reg(Vec(endSourceId, Bool())) // We want to cross flight data from A to D in the same cycle (for combinational TL2 devices) val a_flight = Wire(new FlightMonitor) @@ -87,9 +103,9 @@ class TLRAMModel extends LazyModule a_flight.size := edge.size(in.a.bits) a_flight.opcode := in.a.bits.opcode - flight.write(in.a.bits.source, a_flight) + flight(in.a.bits.source) := a_flight val bypass = in.a.valid && in.a.bits.source === out.d.bits.source - val d_flight = RegNext(Mux(bypass, a_flight, flight.read(out.d.bits.source))) + val d_flight = RegNext(Mux(bypass, a_flight, flight(out.d.bits.source))) // Process A access requests val a = Reg(next = in.a.bits) @@ -103,6 +119,7 @@ class TLRAMModel extends LazyModule val a_addr_hi = a.addr_hi | (a_beats1 & ~a_counter1) val a_base = edge.address(a) val a_mask = edge.mask(a_base, a_size) + val a_fifo = edge.manager.hasFifoId(a_base) // Grab the concurrency state we need val a_inc_bytes = inc_bytes.map(_.read(a_addr_hi)) @@ -121,8 +138,11 @@ class TLRAMModel extends LazyModule // !!! atomics assert (a.opcode =/= TLMessages.Acquire) + // Mark the operation as valid + valid(a.source) := Bool(true) + // Increase the per-byte flight counter for the whole transaction - when (a_first && a.opcode =/= TLMessages.Hint) { + when (a_first && a.opcode =/= TLMessages.Hint && a.opcode =/= TLMessages.Get) { when (a_size <= UInt(shift)) { inc_bytes_wen := a_mask } @@ -139,13 +159,17 @@ class TLRAMModel extends LazyModule } } } + + when (a.opcode === TLMessages.Get) { + printf("G 0x%x - 0%x\n", a_base, a_base | UIntToOH1(a_size, addressBits)) + } } val a_waddr = Mux(wipe, wipeIndex, a_addr_hi) for (i <- 0 until beatBytes) { val data = Wire(new ByteMonitor) - val busy = a_inc(i) - a_dec(i) - (!a_first).asUInt - data.valid := Mux(wipe, Bool(false), busy === UInt(0)) + val busy = a_inc(i) =/= a_dec(i) + (!a_first).asUInt + data.valid := Mux(wipe, Bool(false), !busy || a_fifo) data.value := a.data(8*(i+1)-1, 8*i) when (shadow_wen(i)) { shadow(i).write(a_waddr, data) @@ -169,7 +193,6 @@ class TLRAMModel extends LazyModule // Process D access responses val d = RegNext(out.d.bits) val d_fire = Reg(next = out.d.fire(), init = Bool(false)) - val d_bypass = a_fire && d.source === a.source val d_beats1 = edge.numBeats1(d) val d_size = edge.size(d) val d_sizeOH = UIntToOH(d_size) @@ -180,6 +203,7 @@ class TLRAMModel extends LazyModule val d_base = d_flight.base val d_addr_hi = d_base >> shift | (d_beats1 & ~d_counter1) val d_mask = edge.mask(d_base, d_size) + val d_fifo = edge.manager.hasFifoId(d_flight.base) // Grab the concurrency state we need val d_inc_bytes = inc_bytes.map(_.read(d_addr_hi)) @@ -191,25 +215,41 @@ class TLRAMModel extends LazyModule val d_inc = d_inc_bytes.map(_ + d_inc_tree) val d_dec = d_dec_bytes.map(_ + d_dec_tree) val d_shadow = shadow.map(_.read(d_addr_hi)) + val d_valid = valid(d.source) when (d_fire) { - assert (d_size === d_flight.size) d_counter := Mux(d_first, d_beats1, d_counter1) + // Check the response is correct + assert (d_size === d_flight.size) + assert (edge.manager.findId(d_flight.base) === d.sink) + // addr_lo is allowed to differ + when (d_flight.opcode === TLMessages.Hint) { assert (d.opcode === TLMessages.HintAck) } - // Decreaes the per-byte flight counter for the whole transaction - when (d_last && d_flight.opcode =/= TLMessages.Hint) { + // Decrease the per-byte flight counter for the whole transaction + when (d_last && d_flight.opcode =/= TLMessages.Hint && d_flight.opcode =/= TLMessages.Get) { when (d_size <= UInt(shift)) { dec_bytes_wen := d_mask } dec_trees_wen := d_sizeOH >> (shift+1) + // NOTE: D channel carries uninterrupted multibeast op, so updating on last is fine + for (i <- 0 until endSourceId) { + // Does this modification overlap a Get? => wipe it's valid + val f_base = flight(i).base + val f_size = flight(i).size + val f_bits = UIntToOH1(f_size, addressBits) + val d_bits = UIntToOH1(d_size, addressBits) + val overlap = ~(~(f_base ^ d_base) | (f_bits | d_bits)) === UInt(0) + when (overlap) { valid(i) := Bool(false) } + } } when (d_flight.opcode === TLMessages.PutFullData || d_flight.opcode === TLMessages.PutPartialData) { assert (d.opcode === TLMessages.AccessAck) + printf("p 0x%x - 0x%x\n", d_base, d_base | UIntToOH1(d_size, addressBits)) } // !!! atomics @@ -220,10 +260,15 @@ class TLRAMModel extends LazyModule val got = d.data(8*(i+1)-1, 8*i) val shadow = Wire(init = d_shadow(i)) when (d_mask(i)) { + val d_addr = d_addr_hi << shift | UInt(i) when (!shadow.valid) { - printf("G 0x%x := undefined\n", d_addr_hi << shift | UInt(i)) + printf("g 0x%x := undefined (uninitialized or prior overlapping puts)\n", d_addr) + } .elsewhen (d_inc(i) =/= d_dec(i)) { + printf("g 0x%x := undefined (concurrent incomplete puts #%d)\n", d_addr, d_inc(i) - d_dec(i)) + } .elsewhen (!d_fifo && !d_valid) { + printf("g 0x%x := undefined (concurrent completed put)\n", d_addr) } .otherwise { - printf("G 0x%x := 0x%x\n", d_addr_hi << shift | UInt(i), got) + printf("g 0x%x := 0x%x\n", d_addr, got) assert (shadow.value === got) } }