1
0
rocket-chip/rocket/src/main/scala/nbdcache.scala

1062 lines
40 KiB
Scala
Raw Normal View History

2014-09-13 03:06:41 +02:00
// See LICENSE for license details.
package rocket
import Chisel._
import uncore._
import Util._
case object StoreDataQueueDepth extends Field[Int]
case object ReplayQueueDepth extends Field[Int]
case object NMSHRs extends Field[Int]
case object LRSCCycles extends Field[Int]
2014-09-01 22:28:58 +02:00
case object NDTLBEntries extends Field[Int]
2014-09-01 22:28:58 +02:00
abstract trait L1HellaCacheParameters extends L1CacheParameters {
val indexmsb = untagBits-1
val indexlsb = blockOffBits
val offsetmsb = indexlsb-1
val offsetlsb = wordOffBits
val doNarrowRead = coreDataBits * nWays % rowBits == 0
val encDataBits = code.width(coreDataBits)
val encRowBits = encDataBits*rowWords
}
abstract class L1HellaCacheBundle extends Bundle with L1HellaCacheParameters
abstract class L1HellaCacheModule extends Module with L1HellaCacheParameters
class StoreGen(typ: Bits, addr: Bits, dat: Bits)
2012-11-06 08:52:32 +01:00
{
val byte = typ === MT_B || typ === MT_BU
val half = typ === MT_H || typ === MT_HU
val word = typ === MT_W || typ === MT_WU
def mask =
Mux(byte, Bits( 1) << addr(2,0),
Mux(half, Bits( 3) << Cat(addr(2,1), Bits(0,1)),
Mux(word, Bits( 15) << Cat(addr(2), Bits(0,2)),
Bits(255))))
def data =
Mux(byte, Fill(8, dat( 7,0)),
Mux(half, Fill(4, dat(15,0)),
2013-09-15 01:15:07 +02:00
wordData))
lazy val wordData =
2012-11-06 08:52:32 +01:00
Mux(word, Fill(2, dat(31,0)),
2013-09-15 01:15:07 +02:00
dat)
}
class LoadGen(typ: Bits, addr: Bits, dat: Bits, zero: Bool)
2012-11-06 08:52:32 +01:00
{
2013-09-15 01:15:07 +02:00
val t = new StoreGen(typ, addr, dat)
2012-11-06 08:52:32 +01:00
val sign = typ === MT_B || typ === MT_H || typ === MT_W || typ === MT_D
val wordShift = Mux(addr(2), dat(63,32), dat(31,0))
val word = Cat(Mux(t.word, Fill(32, sign && wordShift(31)), dat(63,32)), wordShift)
val halfShift = Mux(addr(1), word(31,16), word(15,0))
val half = Cat(Mux(t.half, Fill(48, sign && halfShift(15)), word(63,16)), halfShift)
val byteShift = Mux(zero, UInt(0), Mux(addr(0), half(15,8), half(7,0)))
val byte = Cat(Mux(zero || t.byte, Fill(56, sign && byteShift(7)), half(63,8)), byteShift)
}
2014-11-12 03:18:35 +01:00
trait HasCoreData extends CoreBundle {
val data = Bits(width = coreDataBits)
}
class HellaCacheReqInternal extends CoreBundle {
val kill = Bool()
val typ = Bits(width = MT_SZ)
val phys = Bool()
2014-09-01 22:28:58 +02:00
val addr = UInt(width = coreMaxAddrBits)
val tag = Bits(width = coreDCacheReqTagBits)
val cmd = Bits(width = M_SZ)
}
2014-11-12 03:18:35 +01:00
class HellaCacheReq extends HellaCacheReqInternal
with HasCoreData
class HellaCacheResp extends CoreBundle
with HasCoreData {
val nack = Bool() // comes 2 cycles after req.fire
val replay = Bool()
val typ = Bits(width = 3)
val has_data = Bool()
val data_subword = Bits(width = coreDataBits)
2014-09-01 22:28:58 +02:00
val tag = Bits(width = coreDCacheReqTagBits)
val cmd = Bits(width = 4)
2014-09-01 22:28:58 +02:00
val addr = UInt(width = coreMaxAddrBits)
val store_data = Bits(width = coreDataBits)
}
class AlignmentExceptions extends Bundle {
val ld = Bool()
val st = Bool()
}
class HellaCacheExceptions extends Bundle {
val ma = new AlignmentExceptions
val pf = new AlignmentExceptions
}
// interface between D$ and processor/DTLB
2014-09-01 22:28:58 +02:00
class HellaCacheIO extends CoreBundle {
val req = Decoupled(new HellaCacheReq)
val resp = Valid(new HellaCacheResp).flip
2014-09-01 22:28:58 +02:00
val replay_next = Valid(Bits(width = coreDCacheReqTagBits)).flip
val xcpt = (new HellaCacheExceptions).asInput
val ptw = new TLBPTWIO().flip
val ordered = Bool(INPUT)
}
2014-11-12 03:18:35 +01:00
trait HasSDQId extends CoreBundle {
val sdq_id = UInt(width = log2Up(params(StoreDataQueueDepth)))
}
trait HasMissInfo extends CoreBundle with L1HellaCacheParameters {
2014-05-01 10:45:45 +02:00
val tag_match = Bool()
val old_meta = new L1Metadata
val way_en = Bits(width = nWays)
2014-05-01 10:45:45 +02:00
}
2012-03-02 05:20:15 +01:00
2014-11-12 03:18:35 +01:00
class MSHRReq extends HellaCacheReqInternal with HasMissInfo with HasCoreData
class MSHRReqInternal extends HellaCacheReqInternal with HasMissInfo with HasSDQId
class Replay extends HellaCacheReqInternal with L1HellaCacheParameters with HasCoreData
class ReplayInternal extends HellaCacheReqInternal with L1HellaCacheParameters with HasSDQId
class DataReadReq extends L1HellaCacheBundle {
val way_en = Bits(width = nWays)
val addr = Bits(width = untagBits)
}
class DataWriteReq extends DataReadReq {
val wmask = Bits(width = rowWords)
val data = Bits(width = encRowBits)
}
class L1MetaReadReq extends MetaReadReq {
val tag = Bits(width = tagBits)
}
class L1MetaWriteReq extends
MetaWriteReq[L1Metadata](new L1Metadata)
object L1Metadata {
def apply(tag: Bits, coh: ClientMetadata) = {
val meta = new L1Metadata
2014-05-01 10:45:45 +02:00
meta.tag := tag
meta.coh := coh
2014-05-01 10:45:45 +02:00
meta
}
}
class L1Metadata extends Metadata with L1HellaCacheParameters {
val coh = co.clientMetadataOnFlush.clone
}
class InternalProbe extends Probe with HasClientTransactionId
2014-04-24 01:23:51 +02:00
class WritebackReq extends L1HellaCacheBundle {
val tag = Bits(width = tagBits)
val idx = Bits(width = idxBits)
val way_en = Bits(width = nWays)
val client_xact_id = Bits(width = params(TLClientXactIdBits))
val r_type = UInt(width = co.releaseTypeWidth)
}
class MSHR(id: Int) extends L1HellaCacheModule {
val io = new Bundle {
val req_pri_val = Bool(INPUT)
val req_pri_rdy = Bool(OUTPUT)
val req_sec_val = Bool(INPUT)
val req_sec_rdy = Bool(OUTPUT)
2014-11-12 03:18:35 +01:00
val req_bits = new MSHRReqInternal().asInput
val idx_match = Bool(OUTPUT)
val tag = Bits(OUTPUT, tagBits)
2013-08-12 19:39:11 +02:00
val mem_req = Decoupled(new Acquire)
val mem_resp = new DataWriteReq().asOutput
val meta_read = Decoupled(new L1MetaReadReq)
val meta_write = Decoupled(new L1MetaWriteReq)
2014-11-12 03:18:35 +01:00
val replay = Decoupled(new ReplayInternal)
2013-09-11 01:15:19 +02:00
val mem_grant = Valid(new LogicalNetworkIO(new Grant)).flip
val mem_finish = Decoupled(new LogicalNetworkIO(new Finish))
2013-08-12 19:39:11 +02:00
val wb_req = Decoupled(new WritebackReq)
val probe_rdy = Bool(OUTPUT)
}
2013-09-10 19:51:35 +02:00
val s_invalid :: s_wb_req :: s_wb_resp :: s_meta_clear :: s_refill_req :: s_refill_resp :: s_meta_write_req :: s_meta_write_resp :: s_drain_rpq :: Nil = Enum(UInt(), 9)
2013-08-16 00:28:15 +02:00
val state = Reg(init=s_invalid)
2013-08-12 19:39:11 +02:00
val acquire_type = Reg(UInt())
val release_type = Reg(UInt())
val line_state = Reg(new ClientMetadata()(co))
2014-11-12 03:18:35 +01:00
val req = Reg(new MSHRReqInternal())
2012-03-02 05:20:15 +01:00
val req_cmd = io.req_bits.cmd
val req_idx = req.addr(untagBits-1,blockOffBits)
val idx_match = req_idx === io.req_bits.addr(untagBits-1,blockOffBits)
val sec_rdy = idx_match && (state === s_wb_req || state === s_wb_resp || state === s_meta_clear || (state === s_refill_req || state === s_refill_resp) && !co.needsTransactionOnSecondaryMiss(req_cmd, io.mem_req.bits))
2013-08-12 19:39:11 +02:00
val reply = io.mem_grant.valid && io.mem_grant.bits.payload.client_xact_id === UInt(id)
2014-12-07 12:09:54 +01:00
val (refill_cnt, refill_done) = Counter(reply && co.messageUpdatesDataArray(io.mem_grant.bits.payload), refillCycles) // TODO: Zero width?
val wb_done = reply && (state === s_wb_resp)
val meta_on_flush = co.clientMetadataOnFlush
val meta_on_grant = co.clientMetadataOnGrant(io.mem_grant.bits.payload, io.mem_req.bits)
val meta_on_hit = co.clientMetadataOnHit(req_cmd, io.req_bits.old_meta.coh)
2014-11-12 03:18:35 +01:00
val rpq = Module(new Queue(new ReplayInternal, params(ReplayQueueDepth)))
rpq.io.enq.valid := (io.req_pri_val && io.req_pri_rdy || io.req_sec_val && sec_rdy) && !isPrefetch(req_cmd)
2012-03-02 05:20:15 +01:00
rpq.io.enq.bits := io.req_bits
rpq.io.deq.ready := io.replay.ready && state === s_drain_rpq || state === s_invalid
when (state === s_drain_rpq && !rpq.io.deq.valid) {
state := s_invalid
}
2012-12-08 00:14:20 +01:00
when (state === s_meta_write_resp) {
// this wait state allows us to catch RAW hazards on the tags via nack_victim
state := s_drain_rpq
}
2012-12-08 00:14:20 +01:00
when (state === s_meta_write_req && io.meta_write.ready) {
state := s_meta_write_resp
}
when (state === s_refill_resp) {
2012-12-08 00:14:20 +01:00
when (refill_done) { state := s_meta_write_req }
when (reply) {
2014-12-07 12:09:54 +01:00
if(refillCycles > 1) refill_cnt := refill_cnt + UInt(1)
line_state := meta_on_grant
}
}
2013-04-06 10:03:37 +02:00
when (io.mem_req.fire()) { // s_refill_req
state := s_refill_resp
}
when (state === s_meta_clear && io.meta_write.ready) {
2012-04-13 06:57:37 +02:00
state := s_refill_req
}
2013-04-06 10:03:37 +02:00
when (state === s_wb_resp && reply) {
state := s_meta_clear
}
2013-04-06 10:03:37 +02:00
when (io.wb_req.fire()) { // s_wb_req
2013-03-19 23:29:40 +01:00
state := s_wb_resp
}
when (io.req_sec_val && io.req_sec_rdy) { // s_wb_req, s_wb_resp, s_refill_req
acquire_type := co.getAcquireTypeOnSecondaryMiss(req_cmd, meta_on_flush, io.mem_req.bits)
}
when (io.req_pri_val && io.req_pri_rdy) {
line_state := meta_on_flush
2014-12-07 12:09:54 +01:00
refill_cnt := UInt(0)
acquire_type := co.getAcquireTypeOnPrimaryMiss(req_cmd, meta_on_flush)
release_type := co.getReleaseTypeOnVoluntaryWriteback() //TODO downgrades etc
req := io.req_bits
when (io.req_bits.tag_match) {
when (co.isHit(req_cmd, io.req_bits.old_meta.coh)) { // set dirty bit
2012-12-08 00:14:20 +01:00
state := s_meta_write_req
line_state := meta_on_hit
}.otherwise { // upgrade permissions
state := s_refill_req
}
}.otherwise { // writback if necessary and refill
state := Mux(co.needsWriteback(io.req_bits.old_meta.coh), s_wb_req, s_meta_clear)
}
}
val ackq = Module(new Queue(new LogicalNetworkIO(new Finish), 1))
ackq.io.enq.valid := (wb_done || refill_done) && co.requiresAckForGrant(io.mem_grant.bits.payload)
2013-04-06 10:03:37 +02:00
ackq.io.enq.bits.payload.master_xact_id := io.mem_grant.bits.payload.master_xact_id
ackq.io.enq.bits.header.dst := io.mem_grant.bits.header.src
val can_finish = state === s_invalid || state === s_refill_req || state === s_refill_resp
2013-04-06 10:03:37 +02:00
io.mem_finish.valid := ackq.io.deq.valid && can_finish
ackq.io.deq.ready := io.mem_finish.ready && can_finish
io.mem_finish.bits := ackq.io.deq.bits
io.idx_match := (state != s_invalid) && idx_match
io.mem_resp := req
2014-12-07 12:09:54 +01:00
io.mem_resp.addr := (if(refillCycles > 1) Cat(req_idx, refill_cnt) else req_idx) << rowOffBits
io.tag := req.addr >> untagBits
2013-04-06 10:03:37 +02:00
io.req_pri_rdy := state === s_invalid
io.req_sec_rdy := sec_rdy && rpq.io.enq.ready
2013-08-16 00:28:15 +02:00
val meta_hazard = Reg(init=UInt(0,2))
2013-12-10 04:52:47 +01:00
when (meta_hazard != UInt(0)) { meta_hazard := meta_hazard + 1 }
when (io.meta_write.fire()) { meta_hazard := 1 }
io.probe_rdy := !idx_match || (state != s_wb_req && state != s_wb_resp && state != s_meta_clear && meta_hazard === 0)
2012-12-08 00:14:20 +01:00
io.meta_write.valid := state === s_meta_write_req || state === s_meta_clear
io.meta_write.bits.idx := req_idx
io.meta_write.bits.data.coh := Mux(state === s_meta_clear, meta_on_flush, line_state)
io.meta_write.bits.data.tag := io.tag
io.meta_write.bits.way_en := req.way_en
2013-04-06 10:03:37 +02:00
io.wb_req.valid := state === s_wb_req && ackq.io.enq.ready
io.wb_req.bits.tag := req.old_meta.tag
io.wb_req.bits.idx := req_idx
io.wb_req.bits.way_en := req.way_en
2013-01-22 02:18:23 +01:00
io.wb_req.bits.client_xact_id := Bits(id)
io.wb_req.bits.r_type := co.getReleaseTypeOnVoluntaryWriteback()
2013-04-06 10:03:37 +02:00
io.mem_req.valid := state === s_refill_req && ackq.io.enq.ready
io.mem_req.bits := Acquire(acquire_type, Cat(io.tag, req_idx).toUInt, Bits(id))
2013-04-06 10:03:37 +02:00
io.mem_finish <> ackq.io.deq
io.meta_read.valid := state === s_drain_rpq
io.meta_read.bits.idx := req_idx
io.meta_read.bits.tag := io.tag
io.replay.valid := state === s_drain_rpq && rpq.io.deq.valid
io.replay.bits := rpq.io.deq.bits
io.replay.bits.phys := Bool(true)
io.replay.bits.addr := Cat(io.tag, req_idx, rpq.io.deq.bits.addr(blockOffBits-1,0)).toUInt
when (!io.meta_read.ready) {
rpq.io.deq.ready := Bool(false)
io.replay.bits.cmd := M_NOP
}
}
class MSHRFile extends L1HellaCacheModule {
val io = new Bundle {
2013-08-12 19:39:11 +02:00
val req = Decoupled(new MSHRReq).flip
val secondary_miss = Bool(OUTPUT)
2013-08-12 19:39:11 +02:00
val mem_req = Decoupled(new Acquire)
val mem_resp = new DataWriteReq().asOutput
val meta_read = Decoupled(new L1MetaReadReq)
val meta_write = Decoupled(new L1MetaWriteReq)
2013-08-12 19:39:11 +02:00
val replay = Decoupled(new Replay)
2013-09-11 01:15:19 +02:00
val mem_grant = Valid(new LogicalNetworkIO(new Grant)).flip
val mem_finish = Decoupled(new LogicalNetworkIO(new Finish))
2013-08-12 19:39:11 +02:00
val wb_req = Decoupled(new WritebackReq)
val probe_rdy = Bool(OUTPUT)
val fence_rdy = Bool(OUTPUT)
}
val sdq_val = Reg(init=Bits(0, params(StoreDataQueueDepth)))
val sdq_alloc_id = PriorityEncoder(~sdq_val(params(StoreDataQueueDepth)-1,0))
val sdq_rdy = !sdq_val.andR
val sdq_enq = io.req.valid && io.req.ready && isWrite(io.req.bits.cmd)
val sdq = Mem(io.req.bits.data, params(StoreDataQueueDepth))
2012-06-06 11:47:22 +02:00
when (sdq_enq) { sdq(sdq_alloc_id) := io.req.bits.data }
val idxMatch = Vec.fill(params(NMSHRs)){Bool()}
val tagList = Vec.fill(params(NMSHRs)){Bits()}
val tag_match = Mux1H(idxMatch, tagList) === io.req.bits.addr >> untagBits
val wbTagList = Vec.fill(params(NMSHRs)){Bits()}
val memRespMux = Vec.fill(params(NMSHRs)){new DataWriteReq}
val meta_read_arb = Module(new Arbiter(new L1MetaReadReq, params(NMSHRs)))
val meta_write_arb = Module(new Arbiter(new L1MetaWriteReq, params(NMSHRs)))
2014-12-07 12:09:54 +01:00
val mem_req_arb = Module(new LockingArbiter(new Acquire, params(NMSHRs), outerDataBeats, co.messageHasData _))
val mem_finish_arb = Module(new Arbiter(new LogicalNetworkIO(new Finish), params(NMSHRs)))
val wb_req_arb = Module(new Arbiter(new WritebackReq, params(NMSHRs)))
2014-11-12 03:18:35 +01:00
val replay_arb = Module(new Arbiter(new ReplayInternal, params(NMSHRs)))
val alloc_arb = Module(new Arbiter(Bool(), params(NMSHRs)))
var idx_match = Bool(false)
var pri_rdy = Bool(false)
var sec_rdy = Bool(false)
io.fence_rdy := true
io.probe_rdy := true
for (i <- 0 until params(NMSHRs)) {
2013-08-12 19:39:11 +02:00
val mshr = Module(new MSHR(i))
idxMatch(i) := mshr.io.idx_match
tagList(i) := mshr.io.tag
wbTagList(i) := mshr.io.wb_req.bits.tag
alloc_arb.io.in(i).valid := mshr.io.req_pri_rdy
mshr.io.req_pri_val := alloc_arb.io.in(i).ready
2012-03-02 05:20:15 +01:00
mshr.io.req_sec_val := io.req.valid && sdq_rdy && tag_match
mshr.io.req_bits := io.req.bits
2014-11-12 03:18:35 +01:00
mshr.io.req_bits.sdq_id := sdq_alloc_id
mshr.io.meta_read <> meta_read_arb.io.in(i)
mshr.io.meta_write <> meta_write_arb.io.in(i)
mshr.io.mem_req <> mem_req_arb.io.in(i)
2012-03-07 00:47:19 +01:00
mshr.io.mem_finish <> mem_finish_arb.io.in(i)
mshr.io.wb_req <> wb_req_arb.io.in(i)
mshr.io.replay <> replay_arb.io.in(i)
mshr.io.mem_grant <> io.mem_grant
memRespMux(i) := mshr.io.mem_resp
pri_rdy = pri_rdy || mshr.io.req_pri_rdy
sec_rdy = sec_rdy || mshr.io.req_sec_rdy
idx_match = idx_match || mshr.io.idx_match
when (!mshr.io.req_pri_rdy) { io.fence_rdy := false }
when (!mshr.io.probe_rdy) { io.probe_rdy := false }
}
2012-03-02 05:20:15 +01:00
alloc_arb.io.out.ready := io.req.valid && sdq_rdy && !idx_match
meta_read_arb.io.out <> io.meta_read
meta_write_arb.io.out <> io.meta_write
mem_req_arb.io.out <> io.mem_req
2012-03-07 00:47:19 +01:00
mem_finish_arb.io.out <> io.mem_finish
wb_req_arb.io.out <> io.wb_req
2012-03-02 05:20:15 +01:00
io.req.ready := Mux(idx_match, tag_match && sec_rdy, pri_rdy) && sdq_rdy
io.secondary_miss := idx_match
io.mem_resp := memRespMux(io.mem_grant.bits.payload.client_xact_id)
val free_sdq = io.replay.fire() && isWrite(io.replay.bits.cmd)
2013-08-14 02:50:02 +02:00
io.replay.bits.data := sdq(RegEnable(replay_arb.io.out.bits.sdq_id, free_sdq))
io.replay <> replay_arb.io.out
when (io.replay.valid || sdq_enq) {
2014-11-12 03:18:35 +01:00
sdq_val := sdq_val & ~(UIntToOH(replay_arb.io.out.bits.sdq_id) & Fill(params(StoreDataQueueDepth), free_sdq)) |
PriorityEncoderOH(~sdq_val(params(StoreDataQueueDepth)-1,0)) & Fill(params(StoreDataQueueDepth), sdq_enq)
}
}
class WritebackUnit extends L1HellaCacheModule {
val io = new Bundle {
2013-08-12 19:39:11 +02:00
val req = Decoupled(new WritebackReq()).flip
val meta_read = Decoupled(new L1MetaReadReq)
2013-08-12 19:39:11 +02:00
val data_req = Decoupled(new DataReadReq())
val data_resp = Bits(INPUT, encRowBits)
2013-08-12 19:39:11 +02:00
val release = Decoupled(new Release)
}
val active = Reg(init=Bool(false))
2013-08-16 00:28:15 +02:00
val r1_data_req_fired = Reg(init=Bool(false))
val r2_data_req_fired = Reg(init=Bool(false))
2014-12-07 12:09:54 +01:00
val cnt = Reg(init = UInt(0, width = log2Up(refillCycles+1))) //TODO Zero width
val buf_v = (if(refillCyclesPerBeat > 1) Reg(init=Bits(0, width = refillCyclesPerBeat-1)) else Bits(1))
val beat_done = buf_v.andR
2013-08-12 19:39:11 +02:00
val req = Reg(new WritebackReq)
io.release.valid := false
when (active) {
r1_data_req_fired := false
r2_data_req_fired := r1_data_req_fired
when (io.data_req.fire() && io.meta_read.fire()) {
r1_data_req_fired := true
cnt := cnt + 1
}
2014-12-07 12:09:54 +01:00
when (r2_data_req_fired) {
io.release.valid := beat_done
when(!io.release.ready) {
r1_data_req_fired := false
r2_data_req_fired := false
cnt := cnt - Mux[UInt](Bool(refillCycles > 1) && r1_data_req_fired, 2, 1)
} .elsewhen(beat_done) { if(refillCyclesPerBeat > 1) buf_v := 0 }
when(!r1_data_req_fired) {
active := cnt < UInt(refillCycles)
}
}
2012-03-14 00:43:35 +01:00
}
when (io.req.fire()) {
active := true
cnt := 0
2014-12-07 12:09:54 +01:00
if(refillCyclesPerBeat > 1) buf_v := 0
req := io.req.bits
2012-03-06 09:31:44 +01:00
}
val fire = active && cnt < UInt(refillCycles)
io.req.ready := !active
// We reissue the meta read as it sets up the muxing for s2_data_muxed
io.meta_read.valid := fire
io.meta_read.bits.idx := req.idx
io.meta_read.bits.tag := req.tag
io.data_req.valid := fire
io.data_req.bits.way_en := req.way_en
2014-12-07 12:09:54 +01:00
io.data_req.bits.addr := (if(refillCycles > 1) Cat(req.idx, cnt(log2Up(refillCycles)-1,0))
else req.idx) << rowOffBits
io.release.bits.r_type := req.r_type
2013-08-12 19:39:11 +02:00
io.release.bits.addr := Cat(req.tag, req.idx).toUInt
io.release.bits.client_xact_id := req.client_xact_id
2014-12-07 12:09:54 +01:00
io.release.bits.data :=
(if(refillCyclesPerBeat > 1) {
val data_buf = Reg(Bits())
when(active && r2_data_req_fired && !beat_done) {
data_buf := Cat(io.data_resp, data_buf((refillCyclesPerBeat-1)*encRowBits-1, encRowBits))
buf_v := (if(refillCyclesPerBeat > 2)
Cat(UInt(1), buf_v(refillCyclesPerBeat-2,1))
else UInt(1))
}
Cat(io.data_resp, data_buf)
} else { io.data_resp })
2012-03-14 00:43:35 +01:00
}
class ProbeUnit extends L1HellaCacheModule {
2012-03-14 00:43:35 +01:00
val io = new Bundle {
2013-08-12 19:39:11 +02:00
val req = Decoupled(new InternalProbe).flip
val rep = Decoupled(new Release)
val meta_read = Decoupled(new L1MetaReadReq)
val meta_write = Decoupled(new L1MetaWriteReq)
2013-08-12 19:39:11 +02:00
val wb_req = Decoupled(new WritebackReq)
val way_en = Bits(INPUT, nWays)
val mshr_rdy = Bool(INPUT)
val line_state = new ClientMetadata()(co).asInput
2012-03-14 00:43:35 +01:00
}
2013-09-10 19:51:35 +02:00
val s_reset :: s_invalid :: s_meta_read :: s_meta_resp :: s_mshr_req :: s_release :: s_writeback_req :: s_writeback_resp :: s_meta_write :: Nil = Enum(UInt(), 9)
2013-08-16 00:28:15 +02:00
val state = Reg(init=s_invalid)
val line_state = Reg(co.clientMetadataOnFlush.clone)
2013-08-12 19:39:11 +02:00
val way_en = Reg(Bits())
val req = Reg(new InternalProbe)
val hit = way_en.orR
2012-03-14 00:43:35 +01:00
when (state === s_meta_write && io.meta_write.ready) {
2012-03-14 00:43:35 +01:00
state := s_invalid
}
when (state === s_writeback_resp && io.wb_req.ready) {
state := s_meta_write
}
when (state === s_writeback_req && io.wb_req.ready) {
2012-03-14 00:43:35 +01:00
state := s_writeback_resp
}
2013-01-22 02:18:23 +01:00
when (state === s_release && io.rep.ready) {
state := s_invalid
when (hit) {
state := Mux(co.needsWriteback(line_state), s_writeback_req, s_meta_write)
}
2012-03-14 00:43:35 +01:00
}
when (state === s_mshr_req) {
2013-01-22 02:18:23 +01:00
state := s_release
line_state := io.line_state
way_en := io.way_en
when (!io.mshr_rdy) { state := s_meta_read }
2012-04-13 06:57:37 +02:00
}
2012-03-14 00:43:35 +01:00
when (state === s_meta_resp) {
state := s_mshr_req
2012-03-14 00:43:35 +01:00
}
when (state === s_meta_read && io.meta_read.ready) {
2012-03-14 00:43:35 +01:00
state := s_meta_resp
}
when (state === s_invalid && io.req.valid) {
state := s_meta_read
2012-03-14 00:43:35 +01:00
req := io.req.bits
}
2013-01-25 02:46:11 +01:00
when (state === s_reset) {
state := s_invalid
}
2012-03-14 00:43:35 +01:00
2013-01-25 02:46:11 +01:00
io.req.ready := state === s_invalid
2014-12-07 12:09:54 +01:00
io.rep.valid := state === s_release &&
!(hit && co.needsWriteback(line_state)) // Otherwise WBU will issue release
io.rep.bits := Release(co.getReleaseTypeOnProbe(req,
Mux(hit, line_state, co.clientMetadataOnFlush)),
req.addr,
req.client_xact_id)
2012-03-14 00:43:35 +01:00
io.meta_read.valid := state === s_meta_read
io.meta_read.bits.idx := req.addr
io.meta_read.bits.tag := req.addr >> idxBits
2012-03-14 00:43:35 +01:00
io.meta_write.valid := state === s_meta_write
io.meta_write.bits.way_en := way_en
io.meta_write.bits.idx := req.addr
io.meta_write.bits.data.coh := co.clientMetadataOnProbe(req, line_state)
io.meta_write.bits.data.tag := req.addr >> UInt(idxBits)
2012-03-14 00:43:35 +01:00
io.wb_req.valid := state === s_writeback_req
io.wb_req.bits.way_en := way_en
io.wb_req.bits.idx := req.addr
io.wb_req.bits.tag := req.addr >> UInt(idxBits)
io.wb_req.bits.r_type := co.getReleaseTypeOnProbe(req, Mux(hit, line_state, co.clientMetadataOnFlush))
io.wb_req.bits.client_xact_id := req.client_xact_id
}
class DataArray extends L1HellaCacheModule {
val io = new Bundle {
2013-08-12 19:39:11 +02:00
val read = Decoupled(new DataReadReq).flip
val write = Decoupled(new DataWriteReq).flip
val resp = Vec.fill(nWays){Bits(OUTPUT, encRowBits)}
}
val waddr = io.write.bits.addr >> rowOffBits
val raddr = io.read.bits.addr >> rowOffBits
if (doNarrowRead) {
for (w <- 0 until nWays by rowWords) {
val wway_en = io.write.bits.way_en(w+rowWords-1,w)
val rway_en = io.read.bits.way_en(w+rowWords-1,w)
val resp = Vec.fill(rowWords){Bits(width = encRowBits)}
2013-08-14 02:50:02 +02:00
val r_raddr = RegEnable(io.read.bits.addr, io.read.valid)
for (p <- 0 until resp.size) {
val array = Mem(Bits(width=encRowBits), nSets*refillCycles, seqRead = true)
2012-11-27 11:42:27 +01:00
when (wway_en.orR && io.write.valid && io.write.bits.wmask(p)) {
val data = Fill(rowWords, io.write.bits.data(encDataBits*(p+1)-1,encDataBits*p))
val mask = FillInterleaved(encDataBits, wway_en)
array.write(waddr, data, mask)
}
2013-08-14 02:50:02 +02:00
resp(p) := array(RegEnable(raddr, rway_en.orR && io.read.valid))
}
for (dw <- 0 until rowWords) {
val r = Vec(resp.map(_(encDataBits*(dw+1)-1,encDataBits*dw)))
val resp_mux =
if (r.size == 1) r
else Vec(r(r_raddr(rowOffBits-1,wordOffBits)), r.tail:_*)
io.resp(w+dw) := resp_mux.toBits
}
}
} else {
val wmask = FillInterleaved(encDataBits, io.write.bits.wmask)
for (w <- 0 until nWays) {
val array = Mem(Bits(width=encRowBits), nSets*refillCycles, seqRead = true)
when (io.write.bits.way_en(w) && io.write.valid) {
array.write(waddr, io.write.bits.data, wmask)
}
2013-08-14 02:50:02 +02:00
io.resp(w) := array(RegEnable(raddr, io.read.bits.way_en(w) && io.read.valid))
}
}
io.read.ready := Bool(true)
io.write.ready := Bool(true)
}
class AMOALU extends L1HellaCacheModule {
val io = new Bundle {
val addr = Bits(INPUT, blockOffBits)
2012-07-13 03:12:49 +02:00
val cmd = Bits(INPUT, 4)
val typ = Bits(INPUT, 3)
val lhs = Bits(INPUT, coreDataBits)
val rhs = Bits(INPUT, coreDataBits)
val out = Bits(OUTPUT, coreDataBits)
}
require(coreDataBits == 64)
2013-09-15 01:15:07 +02:00
val storegen = new StoreGen(io.typ, io.addr, io.rhs)
val rhs = storegen.wordData
val sgned = io.cmd === M_XA_MIN || io.cmd === M_XA_MAX
val max = io.cmd === M_XA_MAX || io.cmd === M_XA_MAXU
val min = io.cmd === M_XA_MIN || io.cmd === M_XA_MINU
val word = io.typ === MT_W || io.typ === MT_WU || io.typ === MT_B || io.typ === MT_BU
2013-08-12 19:39:11 +02:00
val mask = SInt(-1,64) ^ (io.addr(2) << 31)
2013-09-15 01:15:07 +02:00
val adder_out = (io.lhs & mask).toUInt + (rhs & mask)
val cmp_lhs = Mux(word && !io.addr(2), io.lhs(31), io.lhs(63))
2013-09-15 01:15:07 +02:00
val cmp_rhs = Mux(word && !io.addr(2), rhs(31), rhs(63))
val lt_lo = io.lhs(31,0) < rhs(31,0)
val lt_hi = io.lhs(63,32) < rhs(63,32)
val eq_hi = io.lhs(63,32) === rhs(63,32)
val lt = Mux(word, Mux(io.addr(2), lt_hi, lt_lo), lt_hi || eq_hi && lt_lo)
val less = Mux(cmp_lhs === cmp_rhs, lt, Mux(sgned, cmp_lhs, cmp_rhs))
val out = Mux(io.cmd === M_XA_ADD, adder_out,
2013-09-15 01:15:07 +02:00
Mux(io.cmd === M_XA_AND, io.lhs & rhs,
Mux(io.cmd === M_XA_OR, io.lhs | rhs,
Mux(io.cmd === M_XA_XOR, io.lhs ^ rhs,
Mux(Mux(less, min, max), io.lhs,
2013-09-15 01:15:07 +02:00
storegen.data)))))
2013-09-15 01:15:07 +02:00
val wmask = FillInterleaved(8, storegen.mask)
io.out := wmask & out | ~wmask & io.lhs
}
class HellaCache extends L1HellaCacheModule {
val io = new Bundle {
2013-01-07 22:38:59 +01:00
val cpu = (new HellaCacheIO).flip
val mem = new TileLinkIO
}
2012-02-02 06:11:45 +01:00
require(params(LRSCCycles) >= 32) // ISA requires 16-insn LRSC sequences to succeed
require(isPow2(nSets))
require(isPow2(nWays)) // TODO: relax this
require(params(RowBits) <= params(TLDataBits))
require(paddrBits-blockOffBits == params(TLAddrBits) )
require(untagBits <= pgIdxBits)
2013-08-12 19:39:11 +02:00
val wb = Module(new WritebackUnit)
val prober = Module(new ProbeUnit)
val mshrs = Module(new MSHRFile)
io.cpu.req.ready := Bool(true)
2013-08-16 00:28:15 +02:00
val s1_valid = Reg(next=io.cpu.req.fire(), init=Bool(false))
2013-08-12 19:39:11 +02:00
val s1_req = Reg(io.cpu.req.bits.clone)
val s1_valid_masked = s1_valid && !io.cpu.req.bits.kill
2013-08-16 00:28:15 +02:00
val s1_replay = Reg(init=Bool(false))
2013-08-12 19:39:11 +02:00
val s1_clk_en = Reg(Bool())
2013-08-16 00:28:15 +02:00
val s2_valid = Reg(next=s1_valid_masked, init=Bool(false))
2013-08-12 19:39:11 +02:00
val s2_req = Reg(io.cpu.req.bits.clone)
val s2_replay = Reg(next=s1_replay, init=Bool(false)) && s2_req.cmd != M_NOP
val s2_recycle = Bool()
val s2_valid_masked = Bool()
2013-08-16 00:28:15 +02:00
val s3_valid = Reg(init=Bool(false))
2013-08-12 19:39:11 +02:00
val s3_req = Reg(io.cpu.req.bits.clone)
val s3_way = Reg(Bits())
2013-08-14 02:50:02 +02:00
val s1_recycled = RegEnable(s2_recycle, s1_clk_en)
val s1_read = isRead(s1_req.cmd)
val s1_write = isWrite(s1_req.cmd)
val s1_sc = s1_req.cmd === M_XSC
2012-11-25 07:01:08 +01:00
val s1_readwrite = s1_read || s1_write || isPrefetch(s1_req.cmd)
2012-01-19 00:07:36 +01:00
2014-09-01 22:28:58 +02:00
val dtlb = Module(new TLB(params(NDTLBEntries)))
dtlb.io.ptw <> io.cpu.ptw
dtlb.io.req.valid := s1_valid_masked && s1_readwrite && !s1_req.phys
dtlb.io.req.bits.passthrough := s1_req.phys
2013-08-12 19:39:11 +02:00
dtlb.io.req.bits.asid := UInt(0)
dtlb.io.req.bits.vpn := s1_req.addr >> pgIdxBits
dtlb.io.req.bits.instruction := Bool(false)
when (!dtlb.io.req.ready && !io.cpu.req.bits.phys) { io.cpu.req.ready := Bool(false) }
2012-01-19 00:07:36 +01:00
when (io.cpu.req.valid) {
s1_req := io.cpu.req.bits
}
when (wb.io.meta_read.valid) {
s1_req.addr := Cat(wb.io.meta_read.bits.tag, wb.io.meta_read.bits.idx) << blockOffBits
s1_req.phys := Bool(true)
2012-01-19 00:07:36 +01:00
}
when (prober.io.meta_read.valid) {
s1_req.addr := Cat(prober.io.meta_read.bits.tag, prober.io.meta_read.bits.idx) << blockOffBits
s1_req.phys := Bool(true)
2012-04-16 07:56:02 +02:00
}
2013-08-02 19:06:01 +02:00
when (mshrs.io.replay.valid) {
s1_req := mshrs.io.replay.bits
2012-02-12 02:20:33 +01:00
}
when (s2_recycle) {
s1_req := s2_req
}
val s1_addr = Cat(dtlb.io.resp.ppn, s1_req.addr(pgIdxBits-1,0))
when (s1_clk_en) {
2013-08-12 19:39:11 +02:00
s2_req.kill := s1_req.kill
s2_req.typ := s1_req.typ
2013-08-12 19:39:11 +02:00
s2_req.phys := s1_req.phys
s2_req.addr := s1_addr
when (s1_write) {
2013-08-02 19:06:01 +02:00
s2_req.data := Mux(s1_replay, mshrs.io.replay.bits.data, io.cpu.req.bits.data)
}
when (s1_recycled) { s2_req.data := s1_req.data }
2013-08-12 19:39:11 +02:00
s2_req.tag := s1_req.tag
s2_req.cmd := s1_req.cmd
}
2012-01-19 00:07:36 +01:00
val misaligned =
(((s1_req.typ === MT_H) || (s1_req.typ === MT_HU)) && (s1_req.addr(0) != Bits(0))) ||
(((s1_req.typ === MT_W) || (s1_req.typ === MT_WU)) && (s1_req.addr(1,0) != Bits(0))) ||
((s1_req.typ === MT_D) && (s1_req.addr(2,0) != Bits(0)))
2012-01-19 00:07:36 +01:00
io.cpu.xcpt.ma.ld := s1_read && misaligned
io.cpu.xcpt.ma.st := s1_write && misaligned
io.cpu.xcpt.pf.ld := s1_read && dtlb.io.resp.xcpt_ld
io.cpu.xcpt.pf.st := s1_write && dtlb.io.resp.xcpt_st
2012-01-19 00:07:36 +01:00
// tags
def onReset = L1Metadata(UInt(0), ClientMetadata(UInt(0))(co))
val meta = Module(new MetadataArray(onReset _))
2013-08-12 19:39:11 +02:00
val metaReadArb = Module(new Arbiter(new MetaReadReq, 5))
val metaWriteArb = Module(new Arbiter(new L1MetaWriteReq, 2))
metaReadArb.io.out <> meta.io.read
metaWriteArb.io.out <> meta.io.write
2012-01-19 00:07:36 +01:00
// data
2013-08-12 19:39:11 +02:00
val data = Module(new DataArray)
val readArb = Module(new Arbiter(new DataReadReq, 4))
val writeArb = Module(new Arbiter(new DataWriteReq, 2))
data.io.write.valid := writeArb.io.out.valid
writeArb.io.out.ready := data.io.write.ready
data.io.write.bits := writeArb.io.out.bits
val wdata_encoded = (0 until rowWords).map(i => code.encode(writeArb.io.out.bits.data(coreDataBits*(i+1)-1,coreDataBits*i)))
2014-04-17 02:19:08 +02:00
data.io.write.bits.data := Vec(wdata_encoded).toBits
2012-01-19 00:07:36 +01:00
// tag read for new requests
metaReadArb.io.in(4).valid := io.cpu.req.valid
metaReadArb.io.in(4).bits.idx := io.cpu.req.bits.addr >> blockOffBits
when (!metaReadArb.io.in(4).ready) { io.cpu.req.ready := Bool(false) }
// data read for new requests
readArb.io.in(3).valid := io.cpu.req.valid
readArb.io.in(3).bits.addr := io.cpu.req.bits.addr
2013-08-12 19:39:11 +02:00
readArb.io.in(3).bits.way_en := SInt(-1)
when (!readArb.io.in(3).ready) { io.cpu.req.ready := Bool(false) }
// recycled requests
metaReadArb.io.in(0).valid := s2_recycle
metaReadArb.io.in(0).bits.idx := s2_req.addr >> blockOffBits
readArb.io.in(0).valid := s2_recycle
readArb.io.in(0).bits.addr := s2_req.addr
2013-08-12 19:39:11 +02:00
readArb.io.in(0).bits.way_en := SInt(-1)
// tag check and way muxing
def wayMap[T <: Data](f: Int => T) = Vec((0 until nWays).map(f))
val s1_tag_eq_way = wayMap((w: Int) => meta.io.resp(w).tag === (s1_addr >> untagBits)).toBits
val s1_tag_match_way = wayMap((w: Int) => s1_tag_eq_way(w) && co.isValid(meta.io.resp(w).coh)).toBits
2014-05-07 03:36:22 +02:00
s1_clk_en := metaReadArb.io.out.valid //TODO: should be metaReadArb.io.out.fire(), but triggers Verilog backend bug
val s1_writeback = s1_clk_en && !s1_valid && !s1_replay
2013-08-14 02:50:02 +02:00
val s2_tag_match_way = RegEnable(s1_tag_match_way, s1_clk_en)
val s2_tag_match = s2_tag_match_way.orR
val s2_hit_state = Mux1H(s2_tag_match_way, wayMap((w: Int) => RegEnable(meta.io.resp(w).coh, s1_clk_en)))
val s2_hit = s2_tag_match && co.isHit(s2_req.cmd, s2_hit_state) && s2_hit_state === co.clientMetadataOnHit(s2_req.cmd, s2_hit_state)
2013-04-04 07:15:39 +02:00
// load-reserved/store-conditional
2013-08-16 00:28:15 +02:00
val lrsc_count = Reg(init=UInt(0))
val lrsc_valid = lrsc_count.orR
2013-08-12 19:39:11 +02:00
val lrsc_addr = Reg(UInt())
val (s2_lr, s2_sc) = (s2_req.cmd === M_XLR, s2_req.cmd === M_XSC)
val s2_lrsc_addr_match = lrsc_valid && lrsc_addr === (s2_req.addr >> blockOffBits)
val s2_sc_fail = s2_sc && !s2_lrsc_addr_match
when (lrsc_valid) { lrsc_count := lrsc_count - 1 }
when (s2_valid_masked && s2_hit || s2_replay) {
when (s2_lr) {
when (!lrsc_valid) { lrsc_count := params(LRSCCycles)-1 }
lrsc_addr := s2_req.addr >> blockOffBits
}
when (s2_sc) {
lrsc_count := 0
}
2013-04-04 07:15:39 +02:00
}
2013-11-25 13:35:15 +01:00
when (io.cpu.ptw.sret) { lrsc_count := 0 }
2013-04-04 07:15:39 +02:00
val s2_data = Vec.fill(nWays){Bits(width = encRowBits)}
for (w <- 0 until nWays) {
val regs = Vec.fill(rowWords){Reg(Bits(width = encDataBits))}
val en1 = s1_clk_en && s1_tag_eq_way(w)
for (i <- 0 until regs.size) {
2014-09-25 20:59:19 +02:00
val en = en1 && ((Bool(i == 0) || !Bool(doNarrowRead)) || s1_writeback)
when (en) { regs(i) := data.io.resp(w) >> encDataBits*i }
}
s2_data(w) := regs.toBits
}
val s2_data_muxed = Mux1H(s2_tag_match_way, s2_data)
val s2_data_decoded = (0 until rowWords).map(i => code.decode(s2_data_muxed(encDataBits*(i+1)-1,encDataBits*i)))
2014-04-17 02:19:08 +02:00
val s2_data_corrected = Vec(s2_data_decoded.map(_.corrected)).toBits
val s2_data_uncorrected = Vec(s2_data_decoded.map(_.uncorrected)).toBits
val s2_word_idx = if(doNarrowRead) UInt(0) else s2_req.addr(log2Up(rowWords*coreDataBytes)-1,3)
2014-04-17 02:19:08 +02:00
val s2_data_correctable = Vec(s2_data_decoded.map(_.correctable)).toBits()(s2_word_idx)
2012-01-19 00:07:36 +01:00
// store/amo hits
s3_valid := (s2_valid_masked && s2_hit || s2_replay) && !s2_sc_fail && isWrite(s2_req.cmd)
2013-08-12 19:39:11 +02:00
val amoalu = Module(new AMOALU)
when ((s2_valid || s2_replay) && (isWrite(s2_req.cmd) || s2_data_correctable)) {
s3_req := s2_req
s3_req.data := Mux(s2_data_correctable, s2_data_corrected, amoalu.io.out)
s3_way := s2_tag_match_way
2012-01-19 00:07:36 +01:00
}
writeArb.io.in(0).bits.addr := s3_req.addr
writeArb.io.in(0).bits.wmask := UInt(1) << (if(rowOffBits > offsetlsb)
s3_req.addr(rowOffBits-1,offsetlsb).toUInt
else UInt(0))
writeArb.io.in(0).bits.data := Fill(rowWords, s3_req.data)
writeArb.io.in(0).valid := s3_valid
writeArb.io.in(0).bits.way_en := s3_way
// replacement policy
val replacer = params(Replacer)()
2013-08-12 19:39:11 +02:00
val s1_replaced_way_en = UIntToOH(replacer.way)
2013-08-14 02:50:02 +02:00
val s2_replaced_way_en = UIntToOH(RegEnable(replacer.way, s1_clk_en))
val s2_repl_meta = Mux1H(s2_replaced_way_en, wayMap((w: Int) => RegEnable(meta.io.resp(w), s1_clk_en && s1_replaced_way_en(w))).toSeq)
2012-01-19 00:07:36 +01:00
// miss handling
2013-08-02 19:06:01 +02:00
mshrs.io.req.valid := s2_valid_masked && !s2_hit && (isPrefetch(s2_req.cmd) || isRead(s2_req.cmd) || isWrite(s2_req.cmd))
mshrs.io.req.bits := s2_req
mshrs.io.req.bits.tag_match := s2_tag_match
mshrs.io.req.bits.old_meta := Mux(s2_tag_match, L1Metadata(s2_repl_meta.tag, s2_hit_state), s2_repl_meta)
2013-08-02 19:06:01 +02:00
mshrs.io.req.bits.way_en := Mux(s2_tag_match, s2_tag_match_way, s2_replaced_way_en)
mshrs.io.req.bits.data := s2_req.data
when (mshrs.io.req.fire()) { replacer.miss }
2013-08-02 19:06:01 +02:00
io.mem.acquire <> DecoupledLogicalNetworkIOWrapper(mshrs.io.mem_req)
2012-01-19 00:07:36 +01:00
// replays
2013-08-02 19:06:01 +02:00
readArb.io.in(1).valid := mshrs.io.replay.valid
readArb.io.in(1).bits := mshrs.io.replay.bits
2013-08-12 19:39:11 +02:00
readArb.io.in(1).bits.way_en := SInt(-1)
2013-08-02 19:06:01 +02:00
mshrs.io.replay.ready := readArb.io.in(1).ready
s1_replay := mshrs.io.replay.valid && readArb.io.in(1).ready
metaReadArb.io.in(1) <> mshrs.io.meta_read
metaWriteArb.io.in(0) <> mshrs.io.meta_write
2014-12-07 12:09:54 +01:00
// probes and releases
val releaseArb = Module(new LockingArbiter(new Release, 2, outerDataBeats, co.messageHasData _))
DecoupledLogicalNetworkIOWrapper(releaseArb.io.out) <> io.mem.release
val probe = DecoupledLogicalNetworkIOUnwrapper(io.mem.probe)
prober.io.req.valid := probe.valid && !lrsc_valid
probe.ready := prober.io.req.ready && !lrsc_valid
prober.io.req.bits := probe.bits
prober.io.rep <> releaseArb.io.in(1)
prober.io.way_en := s2_tag_match_way
prober.io.line_state := s2_hit_state
prober.io.meta_read <> metaReadArb.io.in(2)
prober.io.meta_write <> metaWriteArb.io.in(1)
2013-08-02 19:06:01 +02:00
prober.io.mshr_rdy := mshrs.io.probe_rdy
2012-01-19 00:07:36 +01:00
// refills
def doRefill(g: Grant): Bool = co.messageUpdatesDataArray(g)
2014-12-07 12:09:54 +01:00
val ser = Module(new FlowThroughSerializer(io.mem.grant.bits, refillCyclesPerBeat, doRefill))
ser.io.in <> io.mem.grant
val refill = ser.io.out
mshrs.io.mem_grant.valid := refill.fire()
mshrs.io.mem_grant.bits := refill.bits
refill.ready := writeArb.io.in(1).ready || !doRefill(refill.bits.payload)
writeArb.io.in(1).valid := refill.valid && doRefill(refill.bits.payload)
2013-08-02 19:06:01 +02:00
writeArb.io.in(1).bits := mshrs.io.mem_resp
2013-08-12 19:39:11 +02:00
writeArb.io.in(1).bits.wmask := SInt(-1)
writeArb.io.in(1).bits.data := refill.bits.payload.data(encRowBits-1,0)
readArb.io.out.ready := !refill.valid || refill.ready // insert bubble if refill gets blocked
readArb.io.out <> data.io.read
// writebacks
val wbArb = Module(new Arbiter(new WritebackReq, 2))
prober.io.wb_req <> wbArb.io.in(0)
mshrs.io.wb_req <> wbArb.io.in(1)
wbArb.io.out <> wb.io.req
wb.io.meta_read <> metaReadArb.io.in(3)
wb.io.data_req <> readArb.io.in(2)
wb.io.data_resp := s2_data_corrected
releaseArb.io.in(0) <> wb.io.release
// store->load bypassing
2013-08-16 00:28:15 +02:00
val s4_valid = Reg(next=s3_valid, init=Bool(false))
2013-08-14 02:50:02 +02:00
val s4_req = RegEnable(s3_req, s3_valid && metaReadArb.io.out.valid)
val bypasses = List(
((s2_valid_masked || s2_replay) && !s2_sc_fail, s2_req, amoalu.io.out),
(s3_valid, s3_req, s3_req.data),
(s4_valid, s4_req, s4_req.data)
).map(r => (r._1 && (s1_addr >> wordOffBits === r._2.addr >> wordOffBits) && isWrite(r._2.cmd), r._3))
val s2_store_bypass_data = Reg(Bits(width = coreDataBits))
2013-08-12 19:39:11 +02:00
val s2_store_bypass = Reg(Bool())
when (s1_clk_en) {
s2_store_bypass := false
when (bypasses.map(_._1).reduce(_||_)) {
2013-12-10 04:52:47 +01:00
s2_store_bypass_data := PriorityMux(bypasses)
s2_store_bypass := true
}
}
// load data subword mux/sign extension
val s2_data_word_prebypass = s2_data_uncorrected >> Cat(s2_word_idx, Bits(0,log2Up(coreDataBits)))
val s2_data_word = Mux(s2_store_bypass, s2_store_bypass_data, s2_data_word_prebypass)
val loadgen = new LoadGen(s2_req.typ, s2_req.addr, s2_data_word, s2_sc)
amoalu.io := s2_req
amoalu.io.lhs := s2_data_word
amoalu.io.rhs := s2_req.data
// nack it like it's hot
val s1_nack = dtlb.io.req.valid && dtlb.io.resp.miss ||
s1_req.addr(indexmsb,indexlsb) === prober.io.meta_write.bits.idx && !prober.io.req.ready
2013-08-14 02:50:02 +02:00
val s2_nack_hit = RegEnable(s1_nack, s1_valid || s1_replay)
2013-08-02 19:06:01 +02:00
when (s2_nack_hit) { mshrs.io.req.valid := Bool(false) }
val s2_nack_victim = s2_hit && mshrs.io.secondary_miss
val s2_nack_miss = !s2_hit && !mshrs.io.req.ready
val s2_nack = s2_nack_hit || s2_nack_victim || s2_nack_miss
s2_valid_masked := s2_valid && !s2_nack
val s2_recycle_ecc = (s2_valid || s2_replay) && s2_hit && s2_data_correctable
2013-08-16 00:28:15 +02:00
val s2_recycle_next = Reg(init=Bool(false))
when (s1_valid || s1_replay) { s2_recycle_next := (s1_valid || s1_replay) && s2_recycle_ecc }
s2_recycle := s2_recycle_ecc || s2_recycle_next
// after a nack, block until nack condition resolves to save energy
2013-08-16 00:28:15 +02:00
val block_miss = Reg(init=Bool(false))
block_miss := (s2_valid || block_miss) && s2_nack_miss
when (block_miss) {
io.cpu.req.ready := Bool(false)
}
2013-09-15 07:34:53 +02:00
io.cpu.resp.valid := (s2_replay || s2_valid_masked && s2_hit) && !s2_data_correctable
io.cpu.resp.bits.nack := s2_valid && s2_nack
2012-11-17 06:26:12 +01:00
io.cpu.resp.bits := s2_req
2013-09-15 07:34:53 +02:00
io.cpu.resp.bits.has_data := isRead(s2_req.cmd) || s2_sc
io.cpu.resp.bits.replay := s2_replay
2012-11-06 08:52:32 +01:00
io.cpu.resp.bits.data := loadgen.word
io.cpu.resp.bits.data_subword := loadgen.byte | s2_sc_fail
2012-11-17 06:15:13 +01:00
io.cpu.resp.bits.store_data := s2_req.data
io.cpu.ordered := mshrs.io.fence_rdy && !s1_valid && !s2_valid
io.cpu.replay_next.valid := s1_replay && (s1_read || s1_sc)
io.cpu.replay_next.bits := s1_req.tag
io.mem.finish <> mshrs.io.mem_finish
}
2013-09-15 07:34:53 +02:00
// exposes a sane decoupled request interface
class SimpleHellaCacheIF extends Module
2013-09-15 07:34:53 +02:00
{
val io = new Bundle {
val requestor = new HellaCacheIO().flip
val cache = new HellaCacheIO
}
val replaying_cmb = Bool()
val replaying = Reg(next = replaying_cmb, init = Bool(false))
replaying_cmb := replaying
val replayq1 = Module(new Queue(new HellaCacheReq, 1, flow = true))
val replayq2 = Module(new Queue(new HellaCacheReq, 1))
val req_arb = Module(new Arbiter(new HellaCacheReq, 2))
req_arb.io.in(0) <> replayq1.io.deq
req_arb.io.in(1).valid := !replaying_cmb && io.requestor.req.valid
req_arb.io.in(1).bits := io.requestor.req.bits
io.requestor.req.ready := !replaying_cmb && req_arb.io.in(1).ready
val s2_nack = io.cache.resp.bits.nack
val s3_nack = Reg(next=s2_nack)
val s0_req_fire = io.cache.req.fire()
val s1_req_fire = Reg(next=s0_req_fire)
val s2_req_fire = Reg(next=s1_req_fire)
io.cache.req.bits.kill := s2_nack
io.cache.req.bits.phys := Bool(true)
io.cache.req.bits.data := RegEnable(req_arb.io.out.bits.data, s0_req_fire)
io.cache.req <> req_arb.io.out
// replay queues
// replayq1 holds the older request
// replayq2 holds the newer request (for the first nack)
// we need to split the queues like this for the case where the older request
// goes through but gets nacked, while the newer request stalls
// if this happens, the newer request will go through before the older
// request
// we don't need to check replayq1.io.enq.ready and replayq2.io.enq.ready as
// there will only be two requests going through at most
// stash d$ request in stage 2 if nacked (older request)
replayq1.io.enq.valid := Bool(false)
replayq1.io.enq.bits.cmd := io.cache.resp.bits.cmd
replayq1.io.enq.bits.typ := io.cache.resp.bits.typ
replayq1.io.enq.bits.addr := io.cache.resp.bits.addr
replayq1.io.enq.bits.data := io.cache.resp.bits.store_data
replayq1.io.enq.bits.tag := io.cache.resp.bits.tag
// stash d$ request in stage 1 if nacked (newer request)
replayq2.io.enq.valid := s2_req_fire && s3_nack
replayq2.io.enq.bits.data := io.cache.resp.bits.store_data
replayq2.io.enq.bits <> io.cache.resp.bits
replayq2.io.deq.ready := Bool(false)
when (s2_nack) {
replayq1.io.enq.valid := Bool(true)
replaying_cmb := Bool(true)
}
// when replaying request got sunk into the d$
when (s2_req_fire && Reg(next=Reg(next=replaying_cmb)) && !s2_nack) {
// see if there's a stashed request in replayq2
when (replayq2.io.deq.valid) {
replayq1.io.enq.valid := Bool(true)
replayq1.io.enq.bits.cmd := replayq2.io.deq.bits.cmd
replayq1.io.enq.bits.typ := replayq2.io.deq.bits.typ
replayq1.io.enq.bits.addr := replayq2.io.deq.bits.addr
replayq1.io.enq.bits.data := replayq2.io.deq.bits.data
replayq1.io.enq.bits.tag := replayq2.io.deq.bits.tag
replayq2.io.deq.ready := Bool(true)
} .otherwise {
replaying_cmb := Bool(false)
}
}
io.requestor.resp := io.cache.resp
}