TLRAM: add support for ECC (#1304)
This commit is contained in:
parent
12583af4a8
commit
4ba8acb4aa
@ -22,10 +22,9 @@ abstract class DiplomaticSRAM(
|
||||
def mask: List[Boolean] = bigBits(address.mask >> log2Ceil(beatBytes))
|
||||
|
||||
// Use single-ported memory with byte-write enable
|
||||
def makeSinglePortedByteWriteSeqMem(size: Int) = {
|
||||
def makeSinglePortedByteWriteSeqMem(size: Int, lanes: Int = beatBytes, bits: Int = 8) = {
|
||||
// We require the address range to include an entire beat (for the write mask)
|
||||
require ((address.mask & (beatBytes-1)) == beatBytes-1)
|
||||
val mem = SeqMem(size, Vec(beatBytes, Bits(width = 8)))
|
||||
val mem = SeqMem(size, Vec(lanes, Bits(width = bits)))
|
||||
devName.foreach(n => mem.suggestName(n.split("-").last))
|
||||
mem
|
||||
}
|
||||
|
@ -13,10 +13,16 @@ class TLRAM(
|
||||
cacheable: Boolean = true,
|
||||
executable: Boolean = true,
|
||||
beatBytes: Int = 4,
|
||||
eccBytes: Int = 1,
|
||||
devName: Option[String] = None,
|
||||
errors: Seq[AddressSet] = Nil)
|
||||
errors: Seq[AddressSet] = Nil,
|
||||
code: Code = new IdentityCode)
|
||||
(implicit p: Parameters) extends DiplomaticSRAM(address, beatBytes, devName)
|
||||
{
|
||||
require (eccBytes >= 1 && isPow2(eccBytes))
|
||||
require (beatBytes >= 1 && isPow2(beatBytes))
|
||||
require (eccBytes <= beatBytes, s"TLRAM eccBytes (${eccBytes}) > beatBytes (${beatBytes}). Use a WidthWidget=>Fragmenter=>SRAM if you need high density and narrow ECC; it will do bursts efficiently")
|
||||
|
||||
val node = TLManagerNode(Seq(TLManagerPortParameters(
|
||||
Seq(TLManagerParameters(
|
||||
address = List(address) ++ errors,
|
||||
@ -33,46 +39,127 @@ class TLRAM(
|
||||
lazy val module = new LazyModuleImp(this) {
|
||||
val (in, edge) = node.in(0)
|
||||
|
||||
val width = code.width(eccBytes*8)
|
||||
val lanes = beatBytes/eccBytes
|
||||
val addrBits = (mask zip edge.addr_hi(in.a.bits).toBools).filter(_._1).map(_._2)
|
||||
val a_legal = address.contains(in.a.bits.address)
|
||||
val memAddress = Cat(addrBits.reverse)
|
||||
val mem = makeSinglePortedByteWriteSeqMem(1 << addrBits.size)
|
||||
val mem = makeSinglePortedByteWriteSeqMem(1 << addrBits.size, lanes, width)
|
||||
|
||||
val d_full = RegInit(Bool(false))
|
||||
val d_read = Reg(Bool())
|
||||
val d_size = Reg(UInt())
|
||||
val d_source = Reg(UInt())
|
||||
val d_data = Wire(UInt())
|
||||
val d_legal = Reg(Bool())
|
||||
/* This block uses a two-stage pipeline; A=>D
|
||||
* Both stages vie for access to the single SRAM port.
|
||||
* Stage D has absolute priority over stage A.
|
||||
* - read-modify-writeback for sub-lane access happens here
|
||||
* - writeback of correctable data happens here
|
||||
* - both actions may occur concurrently
|
||||
* Stage A has lower priority and will stall if blocked
|
||||
* - read operations happen here
|
||||
* - full-lane write operations happen here
|
||||
*/
|
||||
|
||||
// Flow control
|
||||
when (in.d.fire()) { d_full := Bool(false) }
|
||||
when (in.a.fire()) { d_full := Bool(true) }
|
||||
in.d.valid := d_full
|
||||
in.a.ready := in.d.ready || !d_full
|
||||
// D stage registers from A
|
||||
val d_full = RegInit(Bool(false))
|
||||
val d_ram_valid = RegInit(Bool(false)) // true if we just read-out from SRAM
|
||||
val d_size = Reg(UInt())
|
||||
val d_source = Reg(UInt())
|
||||
val d_legal = Reg(Bool())
|
||||
val d_read = Reg(Bool())
|
||||
val d_address = Reg(UInt(width = addrBits.size))
|
||||
val d_rmw_mask = Reg(UInt(width = beatBytes))
|
||||
val d_rmw_data = Reg(UInt(width = 8*beatBytes))
|
||||
|
||||
// Decode raw unregistered SRAM output
|
||||
val d_raw_data = Wire(Vec(lanes, Bits(width = width)))
|
||||
val d_decoded = d_raw_data.map(lane => code.decode(lane))
|
||||
val d_corrected = Cat(d_decoded.map(_.corrected).reverse)
|
||||
val d_uncorrected = Cat(d_decoded.map(_.uncorrected).reverse)
|
||||
val d_correctable = d_decoded.map(_.correctable)
|
||||
val d_uncorrectable = d_decoded.map(_.uncorrectable)
|
||||
val d_need_fix = d_correctable.reduce(_ || _)
|
||||
val d_error = d_uncorrectable.reduce(_ || _)
|
||||
|
||||
// What does D-stage want to write-back?
|
||||
val d_wb_data = Vec(Seq.tabulate(beatBytes) { i =>
|
||||
val upd = d_rmw_mask(i)
|
||||
val rmw = d_rmw_data (8*(i+1)-1, 8*i)
|
||||
val fix = d_corrected(8*(i+1)-1, 8*i) // safe to use, because D-stage write-back always wins arbitration
|
||||
Mux(upd, rmw, fix)
|
||||
}.grouped(eccBytes).map(lane => Cat(lane.reverse)).toList)
|
||||
val (d_wb_lanes, d_wb_poison) = Seq.tabulate(lanes) { i =>
|
||||
val upd = d_rmw_mask(eccBytes*(i+1)-1, eccBytes*i)
|
||||
(upd.orR || d_correctable(i),
|
||||
!upd.andR && d_uncorrectable(i)) // sub-lane writes should not correct uncorrectable
|
||||
}.unzip
|
||||
val d_wb = d_rmw_mask.orR || (d_ram_valid && d_need_fix)
|
||||
|
||||
// Extend the validity of SRAM read-out
|
||||
val d_held_data = RegEnable(d_corrected, d_ram_valid)
|
||||
val d_held_error = RegEnable(d_error, d_ram_valid)
|
||||
|
||||
in.d.bits := edge.AccessAck(d_source, d_size, !d_legal)
|
||||
// avoid data-bus Mux
|
||||
in.d.bits.data := d_data
|
||||
in.d.bits.opcode := Mux(d_read, TLMessages.AccessAckData, TLMessages.AccessAck)
|
||||
in.d.bits.param := UInt(0)
|
||||
in.d.bits.size := d_size
|
||||
in.d.bits.source := d_source
|
||||
in.d.bits.sink := UInt(0)
|
||||
// It is safe to use uncorrected data here because of d_pause
|
||||
in.d.bits.data := Mux(d_ram_valid, d_uncorrected, d_held_data)
|
||||
in.d.bits.error := !d_legal || Mux(d_ram_valid, d_error, d_held_error)
|
||||
|
||||
val read = in.a.bits.opcode === TLMessages.Get
|
||||
val rdata = Wire(Vec(beatBytes, Bits(width = 8)))
|
||||
val wdata = Vec.tabulate(beatBytes) { i => in.a.bits.data(8*(i+1)-1, 8*i) }
|
||||
d_data := Cat(rdata.reverse)
|
||||
// Formulate a response only when SRAM output is unused or correct
|
||||
val d_pause = d_read && d_ram_valid && d_need_fix
|
||||
in.d.valid := d_full && !d_pause
|
||||
in.a.ready := !d_full || (in.d.ready && !d_pause && !d_wb)
|
||||
|
||||
val a_legal = Bool(errors.isEmpty) || address.contains(in.a.bits.address)
|
||||
val a_address = Cat(addrBits.reverse)
|
||||
val a_read = in.a.bits.opcode === TLMessages.Get
|
||||
val a_data = Vec(Seq.tabulate(lanes) { i => in.a.bits.data(eccBytes*8*(i+1)-1, eccBytes*8*i) })
|
||||
|
||||
/*
|
||||
val a_sublane = Seq.tabulate(lanes) { i =>
|
||||
val upd = in.a.bits.mask(eccBytes*(i+1)-1, eccBytes*i)
|
||||
upd.orR && !upd.andR
|
||||
}.reduce(_ || _)
|
||||
*/
|
||||
val a_sublane = if (eccBytes == 1) Bool(false) else
|
||||
in.a.bits.opcode === TLMessages.PutPartialData ||
|
||||
in.a.bits.size < UInt(log2Ceil(eccBytes))
|
||||
val a_ren = a_read || a_sublane
|
||||
val a_lanes = Seq.tabulate(lanes) { i => in.a.bits.mask(eccBytes*(i+1)-1, eccBytes*i).orR }
|
||||
|
||||
when (in.d.fire()) { d_full := Bool(false) }
|
||||
d_ram_valid := Bool(false)
|
||||
d_rmw_mask := UInt(0)
|
||||
when (in.a.fire()) {
|
||||
d_read := read
|
||||
d_size := in.a.bits.size
|
||||
d_source := in.a.bits.source
|
||||
d_legal := a_legal
|
||||
d_full := Bool(true)
|
||||
d_ram_valid := a_ren && a_legal
|
||||
d_size := in.a.bits.size
|
||||
d_source := in.a.bits.source
|
||||
d_legal := a_legal
|
||||
d_read := a_read
|
||||
d_address := a_address
|
||||
d_rmw_mask := UInt(0)
|
||||
when (!a_read && a_sublane) {
|
||||
d_rmw_mask := in.a.bits.mask
|
||||
d_rmw_data := in.a.bits.data
|
||||
}
|
||||
d_held_error:= Bool(false)
|
||||
}
|
||||
|
||||
// exactly this pattern is required to get a RWM memory
|
||||
when (in.a.fire() && !read && a_legal) {
|
||||
mem.write(memAddress, wdata, in.a.bits.mask.toBools)
|
||||
}
|
||||
val ren = in.a.fire() && read
|
||||
rdata := mem.readAndHold(memAddress, ren)
|
||||
// SRAM arbitration
|
||||
val a_fire = in.a.fire() && a_legal
|
||||
val wen = d_wb || (a_fire && !a_ren)
|
||||
// val ren = !d_wb && (a_fire && a_ren)
|
||||
val ren = !wen && a_fire // help Chisel infer a RW-port
|
||||
|
||||
val addr = Mux(d_wb, d_address, a_address)
|
||||
val sel = Mux(d_wb, Vec(d_wb_lanes), Vec(a_lanes))
|
||||
val dat = Mux(d_wb, d_wb_data, a_data)
|
||||
val poison = Mux(d_wb, Vec(d_wb_poison), Vec.fill(lanes) { Bool(false) })
|
||||
val coded = Vec((dat zip poison) map { case (d, p) =>
|
||||
if (code.canDetect) code.encode(d, p) else code.encode(d)
|
||||
})
|
||||
|
||||
d_raw_data := mem.read(addr, ren)
|
||||
when (wen) { mem.write(addr, coded, sel) }
|
||||
|
||||
// Tie off unused channels
|
||||
in.b.valid := Bool(false)
|
||||
@ -88,10 +175,12 @@ object TLRAM
|
||||
cacheable: Boolean = true,
|
||||
executable: Boolean = true,
|
||||
beatBytes: Int = 4,
|
||||
eccBytes: Int = 1,
|
||||
devName: Option[String] = None,
|
||||
errors: Seq[AddressSet] = Nil)(implicit p: Parameters): TLInwardNode =
|
||||
errors: Seq[AddressSet] = Nil,
|
||||
code: Code = new IdentityCode)(implicit p: Parameters): TLInwardNode =
|
||||
{
|
||||
val ram = LazyModule(new TLRAM(address, cacheable, executable, beatBytes, devName, errors))
|
||||
val ram = LazyModule(new TLRAM(address, cacheable, executable, beatBytes, eccBytes, devName, errors, code))
|
||||
ram.node
|
||||
}
|
||||
}
|
||||
@ -115,3 +204,20 @@ class TLRAMSimpleTest(ramBeatBytes: Int, txns: Int = 5000, timeout: Int = 500000
|
||||
val dut = Module(LazyModule(new TLRAMSimple(ramBeatBytes, txns)).module)
|
||||
io.finished := dut.io.finished
|
||||
}
|
||||
|
||||
class TLRAMECC(ramBeatBytes: Int, eccBytes: Int, txns: Int)(implicit p: Parameters) extends LazyModule {
|
||||
val fuzz = LazyModule(new TLFuzzer(txns))
|
||||
val model = LazyModule(new TLRAMModel("SRAMSimple"))
|
||||
val ram = LazyModule(new TLRAM(AddressSet(0x0, 0x3ff), beatBytes = ramBeatBytes, eccBytes = eccBytes, code = new SECDEDCode))
|
||||
|
||||
ram.node := TLDelayer(0.25) := model.node := fuzz.node
|
||||
|
||||
lazy val module = new LazyModuleImp(this) with UnitTestModule {
|
||||
io.finished := fuzz.module.io.finished
|
||||
}
|
||||
}
|
||||
|
||||
class TLRAMECCTest(ramBeatBytes: Int, eccBytes: Int, txns: Int = 5000, timeout: Int = 500000)(implicit p: Parameters) extends UnitTest(timeout) {
|
||||
val dut = Module(LazyModule(new TLRAMECC(ramBeatBytes, eccBytes, txns)).module)
|
||||
io.finished := dut.io.finished
|
||||
}
|
||||
|
@ -50,7 +50,10 @@ class WithTLSimpleUnitTests extends Config((site, here, up) => {
|
||||
Module(new TLRR1Test( txns= 3*txns, timeout=timeout)),
|
||||
Module(new TLRAMRationalCrossingTest(txns= 3*txns, timeout=timeout)),
|
||||
Module(new TLRAMAsyncCrossingTest( txns= 5*txns, timeout=timeout)),
|
||||
Module(new TLRAMAtomicAutomataTest( txns=10*txns, timeout=timeout)) ) }
|
||||
Module(new TLRAMAtomicAutomataTest( txns=10*txns, timeout=timeout)),
|
||||
Module(new TLRAMECCTest(8, 4, txns=15*txns, timeout=timeout)),
|
||||
Module(new TLRAMECCTest(4, 1, txns=15*txns, timeout=timeout)),
|
||||
Module(new TLRAMECCTest(1, 1, txns=15*txns, timeout=timeout)) ) }
|
||||
})
|
||||
|
||||
class WithTLWidthUnitTests extends Config((site, here, up) => {
|
||||
|
Loading…
Reference in New Issue
Block a user