diff --git a/src/main/scala/uncore/tilelink2/AsyncCrossing.scala b/src/main/scala/uncore/tilelink2/AsyncCrossing.scala index 7736e7af..bd2788ea 100644 --- a/src/main/scala/uncore/tilelink2/AsyncCrossing.scala +++ b/src/main/scala/uncore/tilelink2/AsyncCrossing.scala @@ -138,7 +138,7 @@ class TLAsyncCrossing(depth: Int = 8, sync: Int = 3)(implicit p: Parameters) ext import unittest._ class TLRAMAsyncCrossing(implicit p: Parameters) extends LazyModule { - val model = LazyModule(new TLRAMModel) + val model = LazyModule(new TLRAMModel("AsyncCrossing")) val ram = LazyModule(new TLRAM(AddressSet(0x0, 0x3ff))) val fuzz = LazyModule(new TLFuzzer(5000)) val cross = LazyModule(new TLAsyncCrossing) diff --git a/src/main/scala/uncore/tilelink2/AtomicAutomata.scala b/src/main/scala/uncore/tilelink2/AtomicAutomata.scala index 538b7565..ba9a444d 100644 --- a/src/main/scala/uncore/tilelink2/AtomicAutomata.scala +++ b/src/main/scala/uncore/tilelink2/AtomicAutomata.scala @@ -295,7 +295,7 @@ import unittest._ class TLRAMAtomicAutomata()(implicit p: Parameters) extends LazyModule { val fuzz = LazyModule(new TLFuzzer(5000)) - val model = LazyModule(new TLRAMModel) + val model = LazyModule(new TLRAMModel("AtomicAutomata")) val ram = LazyModule(new TLRAM(AddressSet(0x0, 0x3ff))) model.node := fuzz.node diff --git a/src/main/scala/uncore/tilelink2/Atomics.scala b/src/main/scala/uncore/tilelink2/Atomics.scala new file mode 100644 index 00000000..ccfc1a2d --- /dev/null +++ b/src/main/scala/uncore/tilelink2/Atomics.scala @@ -0,0 +1,65 @@ +// See LICENSE.SiFive for license details. + +package uncore.tilelink2 + +import Chisel._ +import config._ +import diplomacy._ +import uncore.tilelink2._ +import TLMessages._ +import TLPermissions._ + +class Atomics(params: TLBundleParameters) extends Module +{ + val io = new Bundle { + val write = Bool().flip // ignore opcode + val a = new TLBundleA(params).flip + val data_in = UInt(width = params.dataBits).flip + val data_out = UInt(width = params.dataBits) + } + + // Arithmetic, what to do + val adder = io.a.param(2) + val unsigned = io.a.param(1) + val take_max = io.a.param(0) + + val signBit = io.a.mask & Cat(UInt(1), ~io.a.mask >> 1) + val inv_d = Mux(adder, io.data_in, ~io.data_in) + val sum = (FillInterleaved(8, io.a.mask) & io.a.data) + inv_d + def sign(x: UInt): Bool = (Cat(x.toBools.grouped(8).map(_.last).toList.reverse) & signBit).orR() + val sign_a = sign(io.a.data) + val sign_d = sign(io.data_in) + val sign_s = sign(sum) + val a_bigger_uneq = unsigned === sign_a // result if high bits are unequal + val a_bigger = Mux(sign_a === sign_d, !sign_s, a_bigger_uneq) + val pick_a = take_max === a_bigger + + // Logical, what to do + val lut = Vec(Seq( + UInt(0x6), // XOR + UInt(0xe), // OR + UInt(0x8), // AND + UInt(0xc)))( // SWAP + io.a.param(1,0)) + val logical = Cat((io.a.data.toBools zip io.data_in.toBools).map { case (a, d) => + lut(Cat(a, d)) + }.reverse) + + // Operation, what to do? (0=d, 1=a, 2=sum, 3=logical) + val select = Mux(io.write, UInt(1), Vec(Seq( + UInt(1), // PutFullData + UInt(1), // PutPartialData + Mux(adder, UInt(2), Mux(pick_a, UInt(1), UInt(0))), // ArithmeticData + UInt(3), // LogicalData + UInt(0), // Get + UInt(0), // Hint + UInt(0), // Acquire + UInt(0)))( // Overwrite + io.a.opcode)) + + // Only the masked bytes can be modified + val selects = io.a.mask.toBools.map(b => Mux(b, select, UInt(0))) + io.data_out := Cat(selects.zipWithIndex.map { case (s, i) => + Vec(Seq(io.data_in, io.a.data, sum, logical).map(_((i + 1) * 8 - 1, i * 8)))(s) + }.reverse) +} diff --git a/src/main/scala/uncore/tilelink2/Fragmenter.scala b/src/main/scala/uncore/tilelink2/Fragmenter.scala index fd06fe87..7676aa2e 100644 --- a/src/main/scala/uncore/tilelink2/Fragmenter.scala +++ b/src/main/scala/uncore/tilelink2/Fragmenter.scala @@ -264,7 +264,7 @@ import unittest._ class TLRAMFragmenter(ramBeatBytes: Int, maxSize: Int)(implicit p: Parameters) extends LazyModule { val fuzz = LazyModule(new TLFuzzer(5000)) - val model = LazyModule(new TLRAMModel) + val model = LazyModule(new TLRAMModel("Fragmenter")) val ram = LazyModule(new TLRAM(AddressSet(0x0, 0x3ff), beatBytes = ramBeatBytes)) model.node := fuzz.node diff --git a/src/main/scala/uncore/tilelink2/HintHandler.scala b/src/main/scala/uncore/tilelink2/HintHandler.scala index 0775aa52..5e153e0c 100644 --- a/src/main/scala/uncore/tilelink2/HintHandler.scala +++ b/src/main/scala/uncore/tilelink2/HintHandler.scala @@ -112,7 +112,7 @@ import unittest._ class TLRAMHintHandler()(implicit p: Parameters) extends LazyModule { val fuzz = LazyModule(new TLFuzzer(5000)) - val model = LazyModule(new TLRAMModel) + val model = LazyModule(new TLRAMModel("HintHandler")) val ram = LazyModule(new TLRAM(AddressSet(0x0, 0x3ff))) model.node := fuzz.node diff --git a/src/main/scala/uncore/tilelink2/RAMModel.scala b/src/main/scala/uncore/tilelink2/RAMModel.scala index fb4e4dfd..2befed32 100644 --- a/src/main/scala/uncore/tilelink2/RAMModel.scala +++ b/src/main/scala/uncore/tilelink2/RAMModel.scala @@ -5,7 +5,7 @@ package uncore.tilelink2 import Chisel._ import config._ import diplomacy._ -import util.GenericParameterizedBundle +import util.{GenericParameterizedBundle, CRC} // We detect concurrent puts that put memory into an undefined state. // put0, put0Ack, put1, put1Ack => ok: defined @@ -45,6 +45,7 @@ class TLRAMModel(log: String = "")(implicit p: Parameters) extends LazyModule val addressBits = log2Up(endAddress) val countBits = log2Up(endSourceId) val sizeBits = edge.bundle.sizeBits + val divisor = CRC.CRC_16F_4_2 // Reset control logic val wipeIndex = RegInit(UInt(0, width = log2Ceil(endAddressHi) + 1)) @@ -156,13 +157,31 @@ class TLRAMModel(log: String = "")(implicit p: Parameters) extends LazyModule } val a_waddr = Mux(wipe, wipeIndex, a_addr_hi) + val a_shadow = shadow.map(_.read(a_waddr)) + val a_known_old = !(Cat(a_shadow.map(!_.valid).reverse) & a_mask).orR + val alu = Module(new Atomics(a.params)) + alu.io.write := Bool(false) + alu.io.a := a + alu.io.data_in := Cat(a_shadow.map(_.value).reverse) + + val crc = Mem(endSourceId, UInt(width = 16)) + val crc_valid = Mem(endSourceId, Bool()) + val a_crc_acc = Mux(a_first, UInt(0), crc(a.source)) + val a_crc_new = Cat(a_shadow.zipWithIndex.map { case (z, i) => Mux(a_mask(i), z.value, UInt(0)) }.reverse) + val a_crc = CRC(divisor, Cat(a_crc_acc, a_crc_new), 16 + beatBytes*8) + val a_crc_valid = a_known_old && Mux(a_first, Bool(true), crc_valid(a.source)) + when (a_fire) { + crc.write(a.source, a_crc) + crc_valid.write(a.source, a_crc_valid) + } + for (i <- 0 until beatBytes) { val data = Wire(new TLRAMModel.ByteMonitor(params)) val busy = a_inc(i) =/= a_dec(i) + (!a_first).asUInt val amo = a.opcode === TLMessages.ArithmeticData || a.opcode === TLMessages.LogicalData - data.valid := Mux(wipe, Bool(false), (!busy || a_fifo) && !amo) - // !!! calculate the AMO? - data.value := a.data(8*(i+1)-1, 8*i) + val beat_amo = a.size <= UInt(log2Ceil(beatBytes)) + data.valid := Mux(wipe, Bool(false), (!busy || a_fifo) && (!amo || (a_known_old && beat_amo))) + data.value := alu.io.data_out(8*(i+1)-1, 8*i) when (shadow_wen(i)) { shadow(i).write(a_waddr, data) } @@ -206,7 +225,22 @@ class TLRAMModel(log: String = "")(implicit p: Parameters) extends LazyModule val d_shadow = shadow.map(_.read(d_addr_hi)) val d_valid = valid(d.source) + // CRC check + val d_crc_reg = Reg(UInt(width = 16)) + val d_crc_acc = Mux(d_first, UInt(0), d_crc_reg) + val d_crc_new = FillInterleaved(8, d_mask) & d.data + val d_crc = CRC(divisor, Cat(d_crc_acc, d_crc_new), 16 + beatBytes*8) + val crc_bypass = if (edge.manager.minLatency > 0) Bool(false) else a_fire && a.source === d.source + val d_crc_valid = Mux(crc_bypass, a_crc_valid, crc_valid.read(d.source)) + val d_crc_check = Mux(crc_bypass, a_crc, crc.read(d.source)) + + val d_no_race_reg = Reg(Bool()) + val d_no_race = Wire(init = d_no_race_reg) + when (d_fire) { + d_crc_reg := d_crc + d_no_race_reg := d_no_race + // Check the response is correct assert (d_size === d_flight.size) // addr_lo is allowed to differ @@ -261,11 +295,24 @@ class TLRAMModel(log: String = "")(implicit p: Parameters) extends LazyModule printf(", undefined (concurrent completed put)\n") } .otherwise { printf("\n") + when (shadow.value =/= got) { printf("EXPECTED: 0x%x\n", shadow.value) } assert (shadow.value === got) } } } } + + when (d_flight.opcode === TLMessages.ArithmeticData || d_flight.opcode === TLMessages.LogicalData) { + val race = (d_inc zip d_dec) map { case (i, d) => i - d =/= UInt(1) } + when (d_first) { d_no_race := Bool(true) } + when ((Cat(race.reverse) & d_mask).orR) { d_no_race := Bool(false) } + when (d_last) { + val must_match = d_crc_valid && (d_fifo || (d_valid && d_no_race)) + printf(log + " crc = 0x%x %d\n", d_crc, must_match.asUInt) + when (must_match && d_crc =/= d_crc_check) { printf("EXPECTED: 0x%x\n", d_crc_check) } + assert (!must_match || d_crc === d_crc_check) + } + } } val d_waddr = Mux(wipe, wipeIndex, d_addr_hi) diff --git a/src/main/scala/uncore/tilelink2/RationalCrossing.scala b/src/main/scala/uncore/tilelink2/RationalCrossing.scala index a736269c..e13bbbbf 100644 --- a/src/main/scala/uncore/tilelink2/RationalCrossing.scala +++ b/src/main/scala/uncore/tilelink2/RationalCrossing.scala @@ -149,10 +149,10 @@ class TLRationalCrossing(direction: RationalDirection = Symmetric)(implicit p: P /** Synthesizeable unit tests */ import unittest._ -class TLRAMRationalCrossingSource(implicit p: Parameters) extends LazyModule { +class TLRAMRationalCrossingSource(name: String)(implicit p: Parameters) extends LazyModule { val node = TLRationalOutputNode() val fuzz = LazyModule(new TLFuzzer(5000)) - val model = LazyModule(new TLRAMModel) + val model = LazyModule(new TLRAMModel(name)) model.node := fuzz.node node := TLRationalCrossingSource()(TLDelayer(0.25)(model.node)) @@ -180,19 +180,19 @@ class TLRAMRationalCrossingSink(direction: RationalDirection)(implicit p: Parame } class TLRAMRationalCrossing(implicit p: Parameters) extends LazyModule { - val sym_fast_source = LazyModule(new TLRAMRationalCrossingSource) + val sym_fast_source = LazyModule(new TLRAMRationalCrossingSource("RationalCrossing sym_fast")) val sym_slow_sink = LazyModule(new TLRAMRationalCrossingSink(Symmetric)) sym_slow_sink.node := sym_fast_source.node - val sym_slow_source = LazyModule(new TLRAMRationalCrossingSource) + val sym_slow_source = LazyModule(new TLRAMRationalCrossingSource("RationalCrossing sym_slow")) val sym_fast_sink = LazyModule(new TLRAMRationalCrossingSink(Symmetric)) sym_fast_sink.node := sym_slow_source.node - val fix_fast_source = LazyModule(new TLRAMRationalCrossingSource) + val fix_fast_source = LazyModule(new TLRAMRationalCrossingSource("RationalCrossing fast")) val fix_slow_sink = LazyModule(new TLRAMRationalCrossingSink(FastToSlow)) fix_slow_sink.node := fix_fast_source.node - val fix_slow_source = LazyModule(new TLRAMRationalCrossingSource) + val fix_slow_source = LazyModule(new TLRAMRationalCrossingSource("RationalCrossing slow")) val fix_fast_sink = LazyModule(new TLRAMRationalCrossingSink(SlowToFast)) fix_fast_sink.node := fix_slow_source.node diff --git a/src/main/scala/uncore/tilelink2/SRAM.scala b/src/main/scala/uncore/tilelink2/SRAM.scala index d287c094..2e9d9d35 100644 --- a/src/main/scala/uncore/tilelink2/SRAM.scala +++ b/src/main/scala/uncore/tilelink2/SRAM.scala @@ -91,7 +91,7 @@ import unittest._ class TLRAMSimple(ramBeatBytes: Int)(implicit p: Parameters) extends LazyModule { val fuzz = LazyModule(new TLFuzzer(5000)) - val model = LazyModule(new TLRAMModel) + val model = LazyModule(new TLRAMModel("SRAMSimple")) val ram = LazyModule(new TLRAM(AddressSet(0x0, 0x3ff), beatBytes = ramBeatBytes)) model.node := fuzz.node diff --git a/src/main/scala/uncore/tilelink2/TestRAM.scala b/src/main/scala/uncore/tilelink2/TestRAM.scala index 74dd2f75..bab75674 100644 --- a/src/main/scala/uncore/tilelink2/TestRAM.scala +++ b/src/main/scala/uncore/tilelink2/TestRAM.scala @@ -67,7 +67,7 @@ import unittest._ class TLRAMZeroDelay(ramBeatBytes: Int)(implicit p: Parameters) extends LazyModule { val fuzz = LazyModule(new TLFuzzer(5000)) - val model = LazyModule(new TLRAMModel) + val model = LazyModule(new TLRAMModel("ZeroDelay")) val ram = LazyModule(new TLTestRAM(AddressSet(0x0, 0x3ff), beatBytes = ramBeatBytes)) model.node := fuzz.node diff --git a/src/main/scala/uncore/tilelink2/WidthWidget.scala b/src/main/scala/uncore/tilelink2/WidthWidget.scala index e066722a..36e154b3 100644 --- a/src/main/scala/uncore/tilelink2/WidthWidget.scala +++ b/src/main/scala/uncore/tilelink2/WidthWidget.scala @@ -176,7 +176,7 @@ import unittest._ class TLRAMWidthWidget(first: Int, second: Int)(implicit p: Parameters) extends LazyModule { val fuzz = LazyModule(new TLFuzzer(5000)) - val model = LazyModule(new TLRAMModel) + val model = LazyModule(new TLRAMModel("WidthWidget")) val ram = LazyModule(new TLRAM(AddressSet(0x0, 0x3ff))) model.node := fuzz.node diff --git a/src/main/scala/uncore/tilelink2/Xbar.scala b/src/main/scala/uncore/tilelink2/Xbar.scala index 7d122b3b..377198b9 100644 --- a/src/main/scala/uncore/tilelink2/Xbar.scala +++ b/src/main/scala/uncore/tilelink2/Xbar.scala @@ -215,7 +215,7 @@ import unittest._ class TLRAMXbar(nManagers: Int)(implicit p: Parameters) extends LazyModule { val fuzz = LazyModule(new TLFuzzer(5000)) - val model = LazyModule(new TLRAMModel) + val model = LazyModule(new TLRAMModel("Xbar")) val xbar = LazyModule(new TLXbar) model.node := fuzz.node diff --git a/src/main/scala/unittest/Configs.scala b/src/main/scala/unittest/Configs.scala index ac825820..4fdfad95 100644 --- a/src/main/scala/unittest/Configs.scala +++ b/src/main/scala/unittest/Configs.scala @@ -35,7 +35,8 @@ class WithTLSimpleUnitTests extends Config((site, here, up) => { Module(new uncore.tilelink2.TLRR0Test), Module(new uncore.tilelink2.TLRR1Test), Module(new uncore.tilelink2.TLRAMRationalCrossingTest), - Module(new uncore.tilelink2.TLRAMAsyncCrossingTest) ) } + Module(new uncore.tilelink2.TLRAMAsyncCrossingTest), + Module(new uncore.tilelink2.TLRAMAtomicAutomataTest) ) } }) class WithTLWidthUnitTests extends Config((site, here, up) => { diff --git a/src/main/scala/util/CRC.scala b/src/main/scala/util/CRC.scala new file mode 100644 index 00000000..8dc2ab21 --- /dev/null +++ b/src/main/scala/util/CRC.scala @@ -0,0 +1,35 @@ +// See LICENSE.SiFive for license details. + +package util + +import Chisel._ + +object CRC +{ + // A divisor of 0x1d5 is interpretted to be x^8 + x^7 + x^6 + x^4 + x^2 + 1 + // Let n be the highest term in the divisor; n=8 in 0x1d5. + // Then, this function calculates c mod d, returning an n-bit UInt. + // coefficient.width must be <= width + def apply(divisor: BigInt, coefficient: UInt, width: Integer): UInt = { + require (divisor > 0 && divisor.testBit(0)) + require (width > 0) + assert (coefficient >> width === UInt(0)) + val n = log2Floor(divisor) + val m = width + if (m <= n) return coefficient + + // Initialize the reduction matrix + val array = Array.tabulate(m) { BigInt(1) << _ } + // Reduce the matrix of terms larger than n + for { + i <- (n until m).reverse + j <- 0 to n + if divisor.testBit(j) + } array(i-(n-j)) ^= array(i) + // Construct the circuit + Cat(Seq.tabulate(n) { i => (UInt(array(i)) & coefficient).xorR } .reverse) + } + + // Find more great CRC polynomials here: https://users.ece.cmu.edu/~koopman/crc/ + val CRC_16F_4_2 = BigInt(0x1a2eb) // HD=4 for <32751 bits and HD=6 for <93 bits +}