9804bdc34e
When we first implemented TL, we thought this was helpful, because it made WidthWidgets stateless in all cases. However, it put too much burden on all other masters and slaves, none of which benefitted from this signal. Furthermore, even with addr_lo, WidthWidgets were information lossy because when they widen, they have no information about what to fill in the new high bits of addr_lo.
312 lines
13 KiB
Scala
312 lines
13 KiB
Scala
// See LICENSE.Berkeley for license details.
|
|
// See LICENSE.SiFive for license details.
|
|
|
|
package freechips.rocketchip.rocket
|
|
|
|
import Chisel._
|
|
import Chisel.ImplicitConversions._
|
|
import freechips.rocketchip.config.Parameters
|
|
import freechips.rocketchip.coreplex.RocketTilesKey
|
|
import freechips.rocketchip.diplomacy._
|
|
import freechips.rocketchip.tile._
|
|
import freechips.rocketchip.tilelink._
|
|
import freechips.rocketchip.util._
|
|
|
|
case class ICacheParams(
|
|
nSets: Int = 64,
|
|
nWays: Int = 4,
|
|
rowBits: Int = 128,
|
|
nTLBEntries: Int = 32,
|
|
cacheIdBits: Int = 0,
|
|
tagECC: Code = new IdentityCode,
|
|
dataECC: Code = new IdentityCode,
|
|
itimAddr: Option[BigInt] = None,
|
|
blockBytes: Int = 64,
|
|
latency: Int = 2,
|
|
fetchBytes: Int = 4) extends L1CacheParams {
|
|
def replacement = new RandomReplacement(nWays)
|
|
}
|
|
|
|
trait HasL1ICacheParameters extends HasL1CacheParameters with HasCoreParameters {
|
|
val cacheParams = tileParams.icache.get
|
|
}
|
|
|
|
class ICacheReq(implicit p: Parameters) extends CoreBundle()(p) with HasL1ICacheParameters {
|
|
val addr = UInt(width = vaddrBits)
|
|
}
|
|
|
|
class ICache(val icacheParams: ICacheParams, val hartid: Int)(implicit p: Parameters) extends LazyModule {
|
|
lazy val module = new ICacheModule(this)
|
|
val masterNode = TLClientNode(TLClientParameters(name = s"Core ${hartid} ICache"))
|
|
|
|
val size = icacheParams.nSets * icacheParams.nWays * icacheParams.blockBytes
|
|
val device = new SimpleDevice("itim", Seq("sifive,itim0"))
|
|
val slaveNode = icacheParams.itimAddr.map { itimAddr =>
|
|
val wordBytes = icacheParams.fetchBytes
|
|
TLManagerNode(Seq(TLManagerPortParameters(
|
|
Seq(TLManagerParameters(
|
|
address = Seq(AddressSet(itimAddr, size-1)),
|
|
resources = device.reg("mem"),
|
|
regionType = RegionType.UNCACHED,
|
|
executable = true,
|
|
supportsPutFull = TransferSizes(1, wordBytes),
|
|
supportsPutPartial = TransferSizes(1, wordBytes),
|
|
supportsGet = TransferSizes(1, wordBytes),
|
|
fifoId = Some(0))), // requests handled in FIFO order
|
|
beatBytes = wordBytes,
|
|
minLatency = 1)))
|
|
}
|
|
}
|
|
|
|
class ICacheResp(outer: ICache) extends Bundle {
|
|
val data = UInt(width = outer.icacheParams.fetchBytes*8)
|
|
val ae = Bool()
|
|
|
|
override def cloneType = new ICacheResp(outer).asInstanceOf[this.type]
|
|
}
|
|
|
|
class ICacheBundle(outer: ICache) extends CoreBundle()(outer.p) {
|
|
val hartid = UInt(INPUT, hartIdLen)
|
|
val req = Decoupled(new ICacheReq).flip
|
|
val s1_paddr = UInt(INPUT, paddrBits) // delayed one cycle w.r.t. req
|
|
val s2_vaddr = UInt(INPUT, vaddrBits) // delayed two cycles w.r.t. req
|
|
val s1_kill = Bool(INPUT) // delayed one cycle w.r.t. req
|
|
val s2_kill = Bool(INPUT) // delayed two cycles; prevents I$ miss emission
|
|
|
|
val resp = Valid(new ICacheResp(outer))
|
|
val invalidate = Bool(INPUT)
|
|
val tl_out = outer.masterNode.bundleOut
|
|
val tl_in = outer.slaveNode.map(_.bundleIn)
|
|
}
|
|
|
|
// get a tile-specific property without breaking deduplication
|
|
object GetPropertyByHartId {
|
|
def apply[T <: Data](tiles: Seq[RocketTileParams], f: RocketTileParams => Option[T], hartId: UInt): T = {
|
|
PriorityMux(tiles.zipWithIndex.collect { case (t, i) if f(t).nonEmpty => (hartId === i) -> f(t).get })
|
|
}
|
|
}
|
|
|
|
class ICacheModule(outer: ICache) extends LazyModuleImp(outer)
|
|
with HasL1ICacheParameters {
|
|
override val cacheParams = outer.icacheParams // Use the local parameters
|
|
|
|
val io = new ICacheBundle(outer)
|
|
val edge_out = outer.masterNode.edgesOut.head
|
|
val tl_out = io.tl_out.head
|
|
val edge_in = outer.slaveNode.map(_.edgesIn.head)
|
|
val tl_in = io.tl_in.map(_.head)
|
|
|
|
val tECC = cacheParams.tagECC
|
|
val dECC = cacheParams.dataECC
|
|
|
|
require(isPow2(nSets) && isPow2(nWays))
|
|
require(!usingVM || pgIdxBits >= untagBits)
|
|
|
|
val scratchpadOn = RegInit(false.B)
|
|
val scratchpadMax = tl_in.map(tl => Reg(UInt(width = log2Ceil(nSets * (nWays - 1)))))
|
|
def lineInScratchpad(line: UInt) = scratchpadMax.map(scratchpadOn && line <= _).getOrElse(false.B)
|
|
def addrMaybeInScratchpad(addr: UInt) = if (outer.icacheParams.itimAddr.isEmpty) false.B else {
|
|
val base = GetPropertyByHartId(p(RocketTilesKey), _.icache.flatMap(_.itimAddr.map(_.U)), io.hartid)
|
|
addr >= base && addr < base + outer.size
|
|
}
|
|
def addrInScratchpad(addr: UInt) = addrMaybeInScratchpad(addr) && lineInScratchpad(addr(untagBits+log2Ceil(nWays)-1, blockOffBits))
|
|
def scratchpadWay(addr: UInt) = addr.extract(untagBits+log2Ceil(nWays)-1, untagBits)
|
|
def scratchpadWayValid(way: UInt) = way < nWays - 1
|
|
def scratchpadLine(addr: UInt) = addr(untagBits+log2Ceil(nWays)-1, blockOffBits)
|
|
val s0_slaveValid = tl_in.map(_.a.fire()).getOrElse(false.B)
|
|
val s1_slaveValid = RegNext(s0_slaveValid, false.B)
|
|
val s2_slaveValid = RegNext(s1_slaveValid, false.B)
|
|
val s3_slaveValid = RegNext(false.B)
|
|
|
|
val s1_valid = Reg(init=Bool(false))
|
|
val s1_tag_hit = Wire(Vec(nWays, Bool()))
|
|
val s1_hit = s1_tag_hit.reduce(_||_) || Mux(s1_slaveValid, true.B, addrMaybeInScratchpad(io.s1_paddr))
|
|
val s2_valid = RegNext(s1_valid && !io.s1_kill, Bool(false))
|
|
val s2_hit = RegNext(s1_hit)
|
|
|
|
val invalidated = Reg(Bool())
|
|
val refill_valid = RegInit(false.B)
|
|
val s2_miss = s2_valid && !s2_hit && !io.s2_kill && !RegNext(refill_valid)
|
|
val refill_addr = RegEnable(io.s1_paddr, s1_valid && !(refill_valid || s2_miss))
|
|
val refill_tag = refill_addr(tagBits+untagBits-1,untagBits)
|
|
val refill_idx = refill_addr(untagBits-1,blockOffBits)
|
|
|
|
io.req.ready := !(tl_out.d.fire() || s0_slaveValid || s3_slaveValid)
|
|
val s0_valid = io.req.fire()
|
|
val s0_vaddr = io.req.bits.addr
|
|
s1_valid := s0_valid
|
|
|
|
val (_, _, refill_done, refill_cnt) = edge_out.count(tl_out.d)
|
|
tl_out.d.ready := !s3_slaveValid
|
|
require (edge_out.manager.minLatency > 0)
|
|
|
|
val repl_way = if (isDM) UInt(0) else {
|
|
// pick a way that is not used by the scratchpad
|
|
val v0 = LFSR16(tl_out.a.fire())(log2Up(nWays)-1,0)
|
|
var v = v0
|
|
for (i <- log2Ceil(nWays) - 1 to 0 by -1) {
|
|
val mask = nWays - (BigInt(1) << (i + 1))
|
|
v = v | (lineInScratchpad(Cat(v0 | mask.U, refill_idx)) << i)
|
|
}
|
|
assert(!lineInScratchpad(Cat(v, refill_idx)))
|
|
v
|
|
}
|
|
|
|
val tag_array = SeqMem(nSets, Vec(nWays, UInt(width = tECC.width(1 + tagBits))))
|
|
val tag_rdata = tag_array.read(s0_vaddr(untagBits-1,blockOffBits), !refill_done && s0_valid)
|
|
val accruedRefillError = Reg(Bool())
|
|
val refillError = tl_out.d.bits.error || (refill_cnt > 0 && accruedRefillError)
|
|
when (refill_done) {
|
|
val enc_tag = tECC.encode(Cat(refillError, refill_tag))
|
|
tag_array.write(refill_idx, Vec.fill(nWays)(enc_tag), Seq.tabulate(nWays)(repl_way === _))
|
|
}
|
|
|
|
val vb_array = Reg(init=Bits(0, nSets*nWays))
|
|
when (tl_out.d.fire()) {
|
|
accruedRefillError := refillError
|
|
// clear bit when refill starts so hit-under-miss doesn't fetch bad data
|
|
vb_array := vb_array.bitSet(Cat(repl_way, refill_idx), refill_done && !invalidated)
|
|
}
|
|
val invalidate = Wire(init = io.invalidate)
|
|
when (invalidate) {
|
|
vb_array := Bits(0)
|
|
invalidated := Bool(true)
|
|
}
|
|
|
|
val s1_tag_disparity = Wire(Vec(nWays, Bool()))
|
|
val s1_tl_error = Wire(Vec(nWays, Bool()))
|
|
val wordBits = outer.icacheParams.fetchBytes*8
|
|
val s1_dout = Wire(Vec(nWays, UInt(width = dECC.width(wordBits))))
|
|
|
|
val s0_slaveAddr = tl_in.map(_.a.bits.address).getOrElse(0.U)
|
|
val s1s3_slaveAddr = Reg(UInt(width = log2Ceil(outer.size)))
|
|
val s1s3_slaveData = Reg(UInt(width = wordBits))
|
|
|
|
for (i <- 0 until nWays) {
|
|
val s1_idx = io.s1_paddr(untagBits-1,blockOffBits)
|
|
val s1_tag = io.s1_paddr(tagBits+untagBits-1,untagBits)
|
|
val scratchpadHit = scratchpadWayValid(i) &&
|
|
Mux(s1_slaveValid,
|
|
lineInScratchpad(scratchpadLine(s1s3_slaveAddr)) && scratchpadWay(s1s3_slaveAddr) === i,
|
|
addrInScratchpad(io.s1_paddr) && scratchpadWay(io.s1_paddr) === i)
|
|
val s1_vb = vb_array(Cat(UInt(i), s1_idx)) && !s1_slaveValid
|
|
val enc_tag = tECC.decode(tag_rdata(i))
|
|
val (tl_error, tag) = Split(enc_tag.uncorrected, tagBits)
|
|
val tagMatch = s1_vb && tag === s1_tag
|
|
s1_tag_disparity(i) := s1_vb && enc_tag.error
|
|
s1_tl_error(i) := tagMatch && tl_error.toBool
|
|
s1_tag_hit(i) := tagMatch || scratchpadHit
|
|
}
|
|
assert(!(s1_valid || s1_slaveValid) || PopCount(s1_tag_hit zip s1_tag_disparity map { case (h, d) => h && !d }) <= 1)
|
|
|
|
require(tl_out.d.bits.data.getWidth % wordBits == 0)
|
|
val data_arrays = Seq.fill(tl_out.d.bits.data.getWidth / wordBits) { SeqMem(nSets * refillCycles, Vec(nWays, UInt(width = dECC.width(wordBits)))) }
|
|
for ((data_array, i) <- data_arrays zipWithIndex) {
|
|
def wordMatch(addr: UInt) = addr.extract(log2Ceil(tl_out.d.bits.data.getWidth/8)-1, log2Ceil(wordBits/8)) === i
|
|
def row(addr: UInt) = addr(untagBits-1, blockOffBits-log2Ceil(refillCycles))
|
|
val s0_ren = (s0_valid && wordMatch(s0_vaddr)) || (s0_slaveValid && wordMatch(s0_slaveAddr))
|
|
val wen = (tl_out.d.fire() && !invalidated) || (s3_slaveValid && wordMatch(s1s3_slaveAddr))
|
|
val mem_idx = Mux(tl_out.d.fire(), (refill_idx << log2Ceil(refillCycles)) | refill_cnt,
|
|
Mux(s3_slaveValid, row(s1s3_slaveAddr),
|
|
Mux(s0_slaveValid, row(s0_slaveAddr),
|
|
row(s0_vaddr))))
|
|
when (wen) {
|
|
val data = Mux(s3_slaveValid, s1s3_slaveData, tl_out.d.bits.data(wordBits*(i+1)-1, wordBits*i))
|
|
val way = Mux(s3_slaveValid, scratchpadWay(s1s3_slaveAddr), repl_way)
|
|
data_array.write(mem_idx, Vec.fill(nWays)(dECC.encode(data)), (0 until nWays).map(way === _))
|
|
}
|
|
val dout = data_array.read(mem_idx, !wen && s0_ren)
|
|
when (wordMatch(Mux(s1_slaveValid, s1s3_slaveAddr, io.s1_paddr))) {
|
|
s1_dout := dout
|
|
}
|
|
}
|
|
|
|
// output signals
|
|
outer.icacheParams.latency match {
|
|
case 1 =>
|
|
require(tECC.isInstanceOf[IdentityCode])
|
|
require(dECC.isInstanceOf[IdentityCode])
|
|
require(outer.icacheParams.itimAddr.isEmpty)
|
|
io.resp.bits.data := Mux1H(s1_tag_hit, s1_dout)
|
|
io.resp.bits.ae := s1_tl_error.asUInt.orR
|
|
io.resp.valid := s1_valid && s1_hit
|
|
|
|
case 2 =>
|
|
val s2_tag_hit = RegEnable(s1_tag_hit, s1_valid || s1_slaveValid)
|
|
val s2_dout = RegEnable(s1_dout, s1_valid || s1_slaveValid)
|
|
val s2_way_mux = Mux1H(s2_tag_hit, s2_dout)
|
|
|
|
val s2_tag_disparity = RegEnable(s1_tag_disparity, s1_valid || s1_slaveValid).asUInt.orR
|
|
val s2_tl_error = RegEnable(s1_tl_error.asUInt.orR, s1_valid || s1_slaveValid)
|
|
val s2_data_decoded = dECC.decode(s2_way_mux)
|
|
val s2_disparity = s2_tag_disparity || s2_data_decoded.error
|
|
when (s2_valid && s2_disparity) { invalidate := true }
|
|
|
|
io.resp.bits.data := s2_data_decoded.uncorrected
|
|
io.resp.bits.ae := s2_tl_error
|
|
io.resp.valid := s2_valid && s2_hit && !s2_disparity
|
|
|
|
tl_in.map { tl =>
|
|
val respValid = RegInit(false.B)
|
|
tl.a.ready := !(tl_out.d.valid || s1_slaveValid || s2_slaveValid || s3_slaveValid || respValid)
|
|
val s1_a = RegEnable(tl.a.bits, s0_slaveValid)
|
|
when (s0_slaveValid) {
|
|
val a = tl.a.bits
|
|
s1s3_slaveAddr := tl.a.bits.address
|
|
s1s3_slaveData := tl.a.bits.data
|
|
when (edge_in.get.hasData(a)) {
|
|
val enable = scratchpadWayValid(scratchpadWay(a.address))
|
|
when (!lineInScratchpad(scratchpadLine(a.address))) {
|
|
scratchpadMax.get := scratchpadLine(a.address)
|
|
when (enable) { invalidate := true }
|
|
}
|
|
scratchpadOn := enable
|
|
}
|
|
}
|
|
|
|
assert(!s2_valid || RegNext(RegNext(s0_vaddr)) === io.s2_vaddr)
|
|
when (!(tl.a.valid || s1_slaveValid || s2_slaveValid || respValid)
|
|
&& s2_valid && s2_data_decoded.correctable && !s2_tag_disparity) {
|
|
// handle correctable errors on CPU accesses to the scratchpad.
|
|
// if there is an in-flight slave-port access to the scratchpad,
|
|
// report the a miss but don't correct the error (as there is
|
|
// a structural hazard on s1s3_slaveData/s1s3_slaveAddress).
|
|
s3_slaveValid := true
|
|
s1s3_slaveData := s2_data_decoded.corrected
|
|
s1s3_slaveAddr := Cat(OHToUInt(s2_tag_hit), io.s2_vaddr(untagBits-1, log2Ceil(wordBits/8)), s1s3_slaveAddr(log2Ceil(wordBits/8)-1, 0))
|
|
}
|
|
|
|
respValid := s2_slaveValid || (respValid && !tl.d.ready)
|
|
when (s2_slaveValid) {
|
|
when (edge_in.get.hasData(s1_a) || s2_data_decoded.correctable) { s3_slaveValid := true }
|
|
def byteEn(i: Int) = !(edge_in.get.hasData(s1_a) && s1_a.mask(i))
|
|
s1s3_slaveData := (0 until wordBits/8).map(i => Mux(byteEn(i), s2_data_decoded.corrected, s1s3_slaveData)(8*(i+1)-1, 8*i)).asUInt
|
|
}
|
|
|
|
tl.d.valid := respValid
|
|
tl.d.bits := Mux(edge_in.get.hasData(s1_a),
|
|
edge_in.get.AccessAck(s1_a),
|
|
edge_in.get.AccessAck(s1_a, UInt(0)))
|
|
tl.d.bits.data := s1s3_slaveData
|
|
|
|
// Tie off unused channels
|
|
tl.b.valid := false
|
|
tl.c.ready := true
|
|
tl.e.ready := true
|
|
}
|
|
}
|
|
tl_out.a.valid := s2_miss && !refill_valid
|
|
tl_out.a.bits := edge_out.Get(
|
|
fromSource = UInt(0),
|
|
toAddress = (refill_addr >> blockOffBits) << blockOffBits,
|
|
lgSize = lgCacheBlockBytes)._2
|
|
tl_out.b.ready := Bool(true)
|
|
tl_out.c.valid := Bool(false)
|
|
tl_out.e.valid := Bool(false)
|
|
assert(!(tl_out.a.valid && addrMaybeInScratchpad(tl_out.a.bits.address)))
|
|
|
|
when (!refill_valid) { invalidated := false.B }
|
|
when (tl_out.a.fire()) { refill_valid := true.B }
|
|
when (refill_done) { refill_valid := false.B}
|
|
}
|