diff --git a/src/main/scala/uncore/tilelink2/RAMModel.scala b/src/main/scala/uncore/tilelink2/RAMModel.scala index b1131588..e85bd5d4 100644 --- a/src/main/scala/uncore/tilelink2/RAMModel.scala +++ b/src/main/scala/uncore/tilelink2/RAMModel.scala @@ -15,10 +15,11 @@ class TLRAMModel extends LazyModule val out = node.bundleOut } - // Pass through all signals unchanged - io.out <> io.in require (io.out.size == 1) // !!! support multiple clients + val in = io.in(0) + val out = io.out(0) + val edge = node.edgesIn(0) val endAddress = edge.manager.maxAddress + 1 val endSourceId = edge.client.endSourceId @@ -32,6 +33,27 @@ class TLRAMModel extends LazyModule val countBits = log2Up(endSourceId) val sizeBits = edge.bundle.sizeBits + // Reset control logic + val wipeIndex = RegInit(UInt(0, width = log2Ceil(endAddressHi) + 1)) + val wipe = !wipeIndex(log2Ceil(endAddressHi)) + wipeIndex := wipeIndex + wipe.asUInt + + // Block traffic while wiping Mems + in.a.ready := out.a.ready && !wipe + out.a.valid := in.a.valid && !wipe + out.a.bits := in.a.bits + out.d.ready := in.d.ready && !wipe + in.d.valid := out.d.valid && !wipe + in.d.bits := out.d.bits + + // BCE unsupported + in.b.valid := Bool(false) + out.c.valid := Bool(false) + out.e.valid := Bool(false) + out.b.ready := Bool(true) + in.c.ready := Bool(true) + in.e.ready := Bool(true) + class ByteMonitor extends Bundle { val valid = Bool() val value = UInt(width = 8) @@ -42,33 +64,39 @@ class TLRAMModel extends LazyModule val opcode = UInt(width = 3) } - // !!! Must somehow power-on with these all initialized with 0 val shadow = Seq.fill(beatBytes) { Mem(endAddressHi, new ByteMonitor) } val inc_bytes = Seq.fill(beatBytes) { Mem(endAddressHi, UInt(width = countBits)) } val dec_bytes = Seq.fill(beatBytes) { Mem(endAddressHi, UInt(width = countBits)) } val inc_trees = Seq.tabulate(decTrees) { i => Mem(endAddressHi >> (i+1), UInt(width = countBits)) } val dec_trees = Seq.tabulate(decTrees) { i => Mem(endAddressHi >> (i+1), UInt(width = countBits)) } - - // Don't care on power-up + + val shadow_wen = Wire(init = Fill(beatBytes, wipe)) + val inc_bytes_wen = Wire(init = Fill(beatBytes, wipe)) + val dec_bytes_wen = Wire(init = Fill(beatBytes, wipe)) + val inc_trees_wen = Wire(init = Fill(decTrees, wipe)) + val dec_trees_wen = Wire(init = Fill(decTrees, wipe)) + + // Don't care on power-up !!! Mem ? val flight = Reg(Vec(endSourceId, new FlightMonitor)) // Process A access requests - val a = RegNext(io.in(0).a) - val a_beats1 = edge.numBeats1(a.bits) - val a_size = edge.size(a.bits) + val a = Reg(next = in.a.bits) + val a_fire = Reg(next = in.a.fire(), init = Bool(false)) + val a_beats1 = edge.numBeats1(a) + val a_size = edge.size(a) val a_sizeOH = UIntToOH(a_size) val a_counter = RegInit(UInt(0, width = maxLgBeats)) val a_counter1 = a_counter - UInt(1) val a_first = a_counter === UInt(0) - val a_addr_hi = a.bits.addr_hi | (a_beats1 & ~a_counter1) - val a_base = edge.address(a.bits) + val a_addr_hi = a.addr_hi | (a_beats1 & ~a_counter1) + val a_base = edge.address(a) val a_mask = edge.mask(a_base, a_size) // What is the request? val a_flight = Wire(new FlightMonitor) a_flight.base := a_base a_flight.size := a_size - a_flight.opcode := a.bits.opcode + a_flight.opcode := a.opcode // Grab the concurrency state we need val a_inc_bytes = inc_bytes.map(_.read(a_addr_hi)) @@ -80,56 +108,72 @@ class TLRAMModel extends LazyModule val a_inc = a_inc_bytes.map(_ + a_inc_tree) val a_dec = a_dec_bytes.map(_ + a_dec_tree) - when (a.fire()) { + when (a_fire) { // Record the request so we can handle it's response - flight(a.bits.source) := a_flight + flight(a.source) := a_flight a_counter := Mux(a_first, a_beats1, a_counter1) // !!! atomics - assert (a.bits.opcode =/= TLMessages.Acquire) + assert (a.opcode =/= TLMessages.Acquire) // Increase the per-byte flight counter for the whole transaction - when (a_first && a.bits.opcode =/= TLMessages.Hint) { + when (a_first && a.opcode =/= TLMessages.Hint) { when (a_size <= UInt(shift)) { - for (i <- 0 until beatBytes) { - when (a_mask(i)) { // not a.bits.mask; the full mask - inc_bytes(i).write(a_addr_hi, a_inc_bytes(i) + UInt(1)) - } - } - } - for (i <- 0 until inc_trees.size) { - when (a_sizeOH(i+shift+1)) { - inc_trees(i).write(a_addr_hi >> (i+1), a_inc_trees(i) + UInt(1)) - } + inc_bytes_wen := a_mask } + inc_trees_wen := a_sizeOH >> (shift+1) } - when (a.bits.opcode === TLMessages.PutFullData || a.bits.opcode === TLMessages.PutPartialData) { + when (a.opcode === TLMessages.PutFullData || a.opcode === TLMessages.PutPartialData) { + shadow_wen := a.mask for (i <- 0 until beatBytes) { - val set = Wire(new ByteMonitor) val busy = a_inc(i) - a_dec(i) - (!a_first).asUInt - set.valid := busy === UInt(0) - set.value := a.bits.data(8*(i+1)-1, 8*i) - when (a.bits.mask(i)) { - shadow(i).write(a_addr_hi, set) - printf("P 0x%x := 0x%x #%d\n", a_addr_hi << shift | UInt(i), set.value, busy) + val byte = a.data(8*(i+1)-1, 8*i) + when (a.mask(i)) { + printf("P 0x%x := 0x%x #%d\n", a_addr_hi << shift | UInt(i), byte, busy) } } } } + val a_waddr = Mux(wipe, wipeIndex, a_addr_hi) + for (i <- 0 until beatBytes) { + val data = Wire(new ByteMonitor) + val busy = a_inc(i) - a_dec(i) - (!a_first).asUInt + data.valid := Mux(wipe, Bool(false), busy === UInt(0)) + data.value := a.data(8*(i+1)-1, 8*i) + when (shadow_wen(i)) { + shadow(i).write(a_waddr, data) + } + } + + for (i <- 0 until beatBytes) { + val data = Mux(wipe, UInt(0), a_inc_bytes(i) + UInt(1)) + when (inc_bytes_wen(i)) { + inc_bytes(i).write(a_waddr, data) + } + } + + for (i <- 0 until inc_trees.size) { + val data = Mux(wipe, UInt(0), a_inc_trees(i) + UInt(1)) + when (inc_trees_wen(i)) { + inc_trees(i).write(a_waddr >> (i+1), data) + } + } + // Process D access responses - val d = RegNext(io.out(0).d) - val d_bypass = a.valid && d.bits.source === a.bits.source - val d_flight = Mux(d_bypass, a_flight, flight(d.bits.source)) - val d_beats1 = edge.numBeats1(d.bits) - val d_size = edge.size(d.bits) + val d = RegNext(out.d.bits) + val d_fire = Reg(next = out.d.fire(), init = Bool(false)) + val d_bypass = a_fire && d.source === a.source + val d_flight = Mux(d_bypass, a_flight, flight(d.source)) + val d_beats1 = edge.numBeats1(d) + val d_size = edge.size(d) val d_sizeOH = UIntToOH(d_size) val d_counter = RegInit(UInt(0, width = maxLgBeats)) val d_counter1 = d_counter - UInt(1) val d_first = d_counter === UInt(0) val d_last = d_counter === UInt(1) || d_beats1 === UInt(0) - val d_base = d_flight.base + val d_base = d_flight.base // !!! not a register => can't be absorbed val d_addr_hi = d_base >> shift | (d_beats1 & ~d_counter1) val d_mask = edge.mask(d_base, d_size) @@ -144,44 +188,36 @@ class TLRAMModel extends LazyModule val d_dec = d_dec_bytes.map(_ + d_dec_tree) val d_shadow = shadow.map(_.read(d_addr_hi)) - when (d.fire()) { + when (d_fire) { assert (d_size === d_flight.size) d_counter := Mux(d_first, d_beats1, d_counter1) when (d_flight.opcode === TLMessages.Hint) { - assert (d.bits.opcode === TLMessages.HintAck) + assert (d.opcode === TLMessages.HintAck) } // Decreaes the per-byte flight counter for the whole transaction when (d_last && d_flight.opcode =/= TLMessages.Hint) { when (d_size <= UInt(shift)) { - for (i <- 0 until beatBytes) { - when (d_mask(i)) { - dec_bytes(i).write(d_addr_hi, d_dec_bytes(i) + UInt(1)) - } - } - } - for (i <- 0 until dec_trees.size) { - when (d_sizeOH(i+shift+1)) { - dec_trees(i).write(d_addr_hi >> (i+1), d_dec_trees(i) + UInt(1)) - } + dec_bytes_wen := d_mask } + dec_trees_wen := d_sizeOH >> (shift+1) } when (d_flight.opcode === TLMessages.PutFullData || d_flight.opcode === TLMessages.PutPartialData) { - assert (d.bits.opcode === TLMessages.AccessAck) + assert (d.opcode === TLMessages.AccessAck) } // !!! atomics when (d_flight.opcode === TLMessages.Get) { - assert (d.bits.opcode === TLMessages.AccessAckData) + assert (d.opcode === TLMessages.AccessAckData) for (i <- 0 until beatBytes) { - val got = d.bits.data(8*(i+1)-1, 8*i) + val got = d.data(8*(i+1)-1, 8*i) val shadow = Wire(init = d_shadow(i)) when (d_mask(i)) { when (!shadow.valid) { - printf("G 0x%x := undefined due to concurrent accesses\n", d_addr_hi << shift | UInt(i)) + printf("G 0x%x := undefined\n", d_addr_hi << shift | UInt(i)) } .otherwise { printf("G 0x%x := 0x%x\n", d_addr_hi << shift | UInt(i), shadow.value) assert (shadow.value === got) @@ -190,5 +226,20 @@ class TLRAMModel extends LazyModule } } } + + val d_waddr = Mux(wipe, wipeIndex, d_addr_hi) + for (i <- 0 until beatBytes) { + val data = Mux(wipe, UInt(0), d_dec_bytes(i) + UInt(1)) + when (dec_bytes_wen(i)) { + dec_bytes(i).write(d_waddr, data) + } + } + + for (i <- 0 until dec_trees.size) { + val data = Mux(wipe, UInt(0), d_dec_trees(i) + UInt(1)) + when (dec_trees_wen(i)) { + dec_trees(i).write(d_waddr >> (i+1), data) + } + } } }