1
0

Merge pull request #678 from ucb-bar/rammodel-atomics

RAMModel atomics
This commit is contained in:
Wesley W. Terpstra 2017-04-14 18:08:33 -07:00 committed by GitHub
commit 153178ac4f
13 changed files with 167 additions and 19 deletions

View File

@ -138,7 +138,7 @@ class TLAsyncCrossing(depth: Int = 8, sync: Int = 3)(implicit p: Parameters) ext
import unittest._
class TLRAMAsyncCrossing(implicit p: Parameters) extends LazyModule {
val model = LazyModule(new TLRAMModel)
val model = LazyModule(new TLRAMModel("AsyncCrossing"))
val ram = LazyModule(new TLRAM(AddressSet(0x0, 0x3ff)))
val fuzz = LazyModule(new TLFuzzer(5000))
val cross = LazyModule(new TLAsyncCrossing)

View File

@ -295,7 +295,7 @@ import unittest._
class TLRAMAtomicAutomata()(implicit p: Parameters) extends LazyModule {
val fuzz = LazyModule(new TLFuzzer(5000))
val model = LazyModule(new TLRAMModel)
val model = LazyModule(new TLRAMModel("AtomicAutomata"))
val ram = LazyModule(new TLRAM(AddressSet(0x0, 0x3ff)))
model.node := fuzz.node

View File

@ -0,0 +1,65 @@
// See LICENSE.SiFive for license details.
package uncore.tilelink2
import Chisel._
import config._
import diplomacy._
import uncore.tilelink2._
import TLMessages._
import TLPermissions._
class Atomics(params: TLBundleParameters) extends Module
{
val io = new Bundle {
val write = Bool().flip // ignore opcode
val a = new TLBundleA(params).flip
val data_in = UInt(width = params.dataBits).flip
val data_out = UInt(width = params.dataBits)
}
// Arithmetic, what to do
val adder = io.a.param(2)
val unsigned = io.a.param(1)
val take_max = io.a.param(0)
val signBit = io.a.mask & Cat(UInt(1), ~io.a.mask >> 1)
val inv_d = Mux(adder, io.data_in, ~io.data_in)
val sum = (FillInterleaved(8, io.a.mask) & io.a.data) + inv_d
def sign(x: UInt): Bool = (Cat(x.toBools.grouped(8).map(_.last).toList.reverse) & signBit).orR()
val sign_a = sign(io.a.data)
val sign_d = sign(io.data_in)
val sign_s = sign(sum)
val a_bigger_uneq = unsigned === sign_a // result if high bits are unequal
val a_bigger = Mux(sign_a === sign_d, !sign_s, a_bigger_uneq)
val pick_a = take_max === a_bigger
// Logical, what to do
val lut = Vec(Seq(
UInt(0x6), // XOR
UInt(0xe), // OR
UInt(0x8), // AND
UInt(0xc)))( // SWAP
io.a.param(1,0))
val logical = Cat((io.a.data.toBools zip io.data_in.toBools).map { case (a, d) =>
lut(Cat(a, d))
}.reverse)
// Operation, what to do? (0=d, 1=a, 2=sum, 3=logical)
val select = Mux(io.write, UInt(1), Vec(Seq(
UInt(1), // PutFullData
UInt(1), // PutPartialData
Mux(adder, UInt(2), Mux(pick_a, UInt(1), UInt(0))), // ArithmeticData
UInt(3), // LogicalData
UInt(0), // Get
UInt(0), // Hint
UInt(0), // Acquire
UInt(0)))( // Overwrite
io.a.opcode))
// Only the masked bytes can be modified
val selects = io.a.mask.toBools.map(b => Mux(b, select, UInt(0)))
io.data_out := Cat(selects.zipWithIndex.map { case (s, i) =>
Vec(Seq(io.data_in, io.a.data, sum, logical).map(_((i + 1) * 8 - 1, i * 8)))(s)
}.reverse)
}

View File

@ -264,7 +264,7 @@ import unittest._
class TLRAMFragmenter(ramBeatBytes: Int, maxSize: Int)(implicit p: Parameters) extends LazyModule {
val fuzz = LazyModule(new TLFuzzer(5000))
val model = LazyModule(new TLRAMModel)
val model = LazyModule(new TLRAMModel("Fragmenter"))
val ram = LazyModule(new TLRAM(AddressSet(0x0, 0x3ff), beatBytes = ramBeatBytes))
model.node := fuzz.node

View File

@ -112,7 +112,7 @@ import unittest._
class TLRAMHintHandler()(implicit p: Parameters) extends LazyModule {
val fuzz = LazyModule(new TLFuzzer(5000))
val model = LazyModule(new TLRAMModel)
val model = LazyModule(new TLRAMModel("HintHandler"))
val ram = LazyModule(new TLRAM(AddressSet(0x0, 0x3ff)))
model.node := fuzz.node

View File

@ -5,7 +5,7 @@ package uncore.tilelink2
import Chisel._
import config._
import diplomacy._
import util.GenericParameterizedBundle
import util.{GenericParameterizedBundle, CRC}
// We detect concurrent puts that put memory into an undefined state.
// put0, put0Ack, put1, put1Ack => ok: defined
@ -45,6 +45,7 @@ class TLRAMModel(log: String = "")(implicit p: Parameters) extends LazyModule
val addressBits = log2Up(endAddress)
val countBits = log2Up(endSourceId)
val sizeBits = edge.bundle.sizeBits
val divisor = CRC.CRC_16F_4_2
// Reset control logic
val wipeIndex = RegInit(UInt(0, width = log2Ceil(endAddressHi) + 1))
@ -156,13 +157,31 @@ class TLRAMModel(log: String = "")(implicit p: Parameters) extends LazyModule
}
val a_waddr = Mux(wipe, wipeIndex, a_addr_hi)
val a_shadow = shadow.map(_.read(a_waddr))
val a_known_old = !(Cat(a_shadow.map(!_.valid).reverse) & a_mask).orR
val alu = Module(new Atomics(a.params))
alu.io.write := Bool(false)
alu.io.a := a
alu.io.data_in := Cat(a_shadow.map(_.value).reverse)
val crc = Mem(endSourceId, UInt(width = 16))
val crc_valid = Mem(endSourceId, Bool())
val a_crc_acc = Mux(a_first, UInt(0), crc(a.source))
val a_crc_new = Cat(a_shadow.zipWithIndex.map { case (z, i) => Mux(a_mask(i), z.value, UInt(0)) }.reverse)
val a_crc = CRC(divisor, Cat(a_crc_acc, a_crc_new), 16 + beatBytes*8)
val a_crc_valid = a_known_old && Mux(a_first, Bool(true), crc_valid(a.source))
when (a_fire) {
crc.write(a.source, a_crc)
crc_valid.write(a.source, a_crc_valid)
}
for (i <- 0 until beatBytes) {
val data = Wire(new TLRAMModel.ByteMonitor(params))
val busy = a_inc(i) =/= a_dec(i) + (!a_first).asUInt
val amo = a.opcode === TLMessages.ArithmeticData || a.opcode === TLMessages.LogicalData
data.valid := Mux(wipe, Bool(false), (!busy || a_fifo) && !amo)
// !!! calculate the AMO?
data.value := a.data(8*(i+1)-1, 8*i)
val beat_amo = a.size <= UInt(log2Ceil(beatBytes))
data.valid := Mux(wipe, Bool(false), (!busy || a_fifo) && (!amo || (a_known_old && beat_amo)))
data.value := alu.io.data_out(8*(i+1)-1, 8*i)
when (shadow_wen(i)) {
shadow(i).write(a_waddr, data)
}
@ -206,7 +225,22 @@ class TLRAMModel(log: String = "")(implicit p: Parameters) extends LazyModule
val d_shadow = shadow.map(_.read(d_addr_hi))
val d_valid = valid(d.source)
// CRC check
val d_crc_reg = Reg(UInt(width = 16))
val d_crc_acc = Mux(d_first, UInt(0), d_crc_reg)
val d_crc_new = FillInterleaved(8, d_mask) & d.data
val d_crc = CRC(divisor, Cat(d_crc_acc, d_crc_new), 16 + beatBytes*8)
val crc_bypass = if (edge.manager.minLatency > 0) Bool(false) else a_fire && a.source === d.source
val d_crc_valid = Mux(crc_bypass, a_crc_valid, crc_valid.read(d.source))
val d_crc_check = Mux(crc_bypass, a_crc, crc.read(d.source))
val d_no_race_reg = Reg(Bool())
val d_no_race = Wire(init = d_no_race_reg)
when (d_fire) {
d_crc_reg := d_crc
d_no_race_reg := d_no_race
// Check the response is correct
assert (d_size === d_flight.size)
// addr_lo is allowed to differ
@ -261,11 +295,24 @@ class TLRAMModel(log: String = "")(implicit p: Parameters) extends LazyModule
printf(", undefined (concurrent completed put)\n")
} .otherwise {
printf("\n")
when (shadow.value =/= got) { printf("EXPECTED: 0x%x\n", shadow.value) }
assert (shadow.value === got)
}
}
}
}
when (d_flight.opcode === TLMessages.ArithmeticData || d_flight.opcode === TLMessages.LogicalData) {
val race = (d_inc zip d_dec) map { case (i, d) => i - d =/= UInt(1) }
when (d_first) { d_no_race := Bool(true) }
when ((Cat(race.reverse) & d_mask).orR) { d_no_race := Bool(false) }
when (d_last) {
val must_match = d_crc_valid && (d_fifo || (d_valid && d_no_race))
printf(log + " crc = 0x%x %d\n", d_crc, must_match.asUInt)
when (must_match && d_crc =/= d_crc_check) { printf("EXPECTED: 0x%x\n", d_crc_check) }
assert (!must_match || d_crc === d_crc_check)
}
}
}
val d_waddr = Mux(wipe, wipeIndex, d_addr_hi)

View File

@ -149,10 +149,10 @@ class TLRationalCrossing(direction: RationalDirection = Symmetric)(implicit p: P
/** Synthesizeable unit tests */
import unittest._
class TLRAMRationalCrossingSource(implicit p: Parameters) extends LazyModule {
class TLRAMRationalCrossingSource(name: String)(implicit p: Parameters) extends LazyModule {
val node = TLRationalOutputNode()
val fuzz = LazyModule(new TLFuzzer(5000))
val model = LazyModule(new TLRAMModel)
val model = LazyModule(new TLRAMModel(name))
model.node := fuzz.node
node := TLRationalCrossingSource()(TLDelayer(0.25)(model.node))
@ -180,19 +180,19 @@ class TLRAMRationalCrossingSink(direction: RationalDirection)(implicit p: Parame
}
class TLRAMRationalCrossing(implicit p: Parameters) extends LazyModule {
val sym_fast_source = LazyModule(new TLRAMRationalCrossingSource)
val sym_fast_source = LazyModule(new TLRAMRationalCrossingSource("RationalCrossing sym_fast"))
val sym_slow_sink = LazyModule(new TLRAMRationalCrossingSink(Symmetric))
sym_slow_sink.node := sym_fast_source.node
val sym_slow_source = LazyModule(new TLRAMRationalCrossingSource)
val sym_slow_source = LazyModule(new TLRAMRationalCrossingSource("RationalCrossing sym_slow"))
val sym_fast_sink = LazyModule(new TLRAMRationalCrossingSink(Symmetric))
sym_fast_sink.node := sym_slow_source.node
val fix_fast_source = LazyModule(new TLRAMRationalCrossingSource)
val fix_fast_source = LazyModule(new TLRAMRationalCrossingSource("RationalCrossing fast"))
val fix_slow_sink = LazyModule(new TLRAMRationalCrossingSink(FastToSlow))
fix_slow_sink.node := fix_fast_source.node
val fix_slow_source = LazyModule(new TLRAMRationalCrossingSource)
val fix_slow_source = LazyModule(new TLRAMRationalCrossingSource("RationalCrossing slow"))
val fix_fast_sink = LazyModule(new TLRAMRationalCrossingSink(SlowToFast))
fix_fast_sink.node := fix_slow_source.node

View File

@ -91,7 +91,7 @@ import unittest._
class TLRAMSimple(ramBeatBytes: Int)(implicit p: Parameters) extends LazyModule {
val fuzz = LazyModule(new TLFuzzer(5000))
val model = LazyModule(new TLRAMModel)
val model = LazyModule(new TLRAMModel("SRAMSimple"))
val ram = LazyModule(new TLRAM(AddressSet(0x0, 0x3ff), beatBytes = ramBeatBytes))
model.node := fuzz.node

View File

@ -67,7 +67,7 @@ import unittest._
class TLRAMZeroDelay(ramBeatBytes: Int)(implicit p: Parameters) extends LazyModule {
val fuzz = LazyModule(new TLFuzzer(5000))
val model = LazyModule(new TLRAMModel)
val model = LazyModule(new TLRAMModel("ZeroDelay"))
val ram = LazyModule(new TLTestRAM(AddressSet(0x0, 0x3ff), beatBytes = ramBeatBytes))
model.node := fuzz.node

View File

@ -176,7 +176,7 @@ import unittest._
class TLRAMWidthWidget(first: Int, second: Int)(implicit p: Parameters) extends LazyModule {
val fuzz = LazyModule(new TLFuzzer(5000))
val model = LazyModule(new TLRAMModel)
val model = LazyModule(new TLRAMModel("WidthWidget"))
val ram = LazyModule(new TLRAM(AddressSet(0x0, 0x3ff)))
model.node := fuzz.node

View File

@ -215,7 +215,7 @@ import unittest._
class TLRAMXbar(nManagers: Int)(implicit p: Parameters) extends LazyModule {
val fuzz = LazyModule(new TLFuzzer(5000))
val model = LazyModule(new TLRAMModel)
val model = LazyModule(new TLRAMModel("Xbar"))
val xbar = LazyModule(new TLXbar)
model.node := fuzz.node

View File

@ -35,7 +35,8 @@ class WithTLSimpleUnitTests extends Config((site, here, up) => {
Module(new uncore.tilelink2.TLRR0Test),
Module(new uncore.tilelink2.TLRR1Test),
Module(new uncore.tilelink2.TLRAMRationalCrossingTest),
Module(new uncore.tilelink2.TLRAMAsyncCrossingTest) ) }
Module(new uncore.tilelink2.TLRAMAsyncCrossingTest),
Module(new uncore.tilelink2.TLRAMAtomicAutomataTest) ) }
})
class WithTLWidthUnitTests extends Config((site, here, up) => {

View File

@ -0,0 +1,35 @@
// See LICENSE.SiFive for license details.
package util
import Chisel._
object CRC
{
// A divisor of 0x1d5 is interpretted to be x^8 + x^7 + x^6 + x^4 + x^2 + 1
// Let n be the highest term in the divisor; n=8 in 0x1d5.
// Then, this function calculates c mod d, returning an n-bit UInt.
// coefficient.width must be <= width
def apply(divisor: BigInt, coefficient: UInt, width: Integer): UInt = {
require (divisor > 0 && divisor.testBit(0))
require (width > 0)
assert (coefficient >> width === UInt(0))
val n = log2Floor(divisor)
val m = width
if (m <= n) return coefficient
// Initialize the reduction matrix
val array = Array.tabulate(m) { BigInt(1) << _ }
// Reduce the matrix of terms larger than n
for {
i <- (n until m).reverse
j <- 0 to n
if divisor.testBit(j)
} array(i-(n-j)) ^= array(i)
// Construct the circuit
Cat(Seq.tabulate(n) { i => (UInt(array(i)) & coefficient).xorR } .reverse)
}
// Find more great CRC polynomials here: https://users.ece.cmu.edu/~koopman/crc/
val CRC_16F_4_2 = BigInt(0x1a2eb) // HD=4 for <32751 bits and HD=6 for <93 bits
}