Implement bypassing L1 data cache for MMIO
This commit is contained in:
parent
e72e5a34b5
commit
382faba4a6
@ -4,18 +4,23 @@ package rocket
|
|||||||
|
|
||||||
import Chisel._
|
import Chisel._
|
||||||
import uncore._
|
import uncore._
|
||||||
|
import junctions.MMIOBase
|
||||||
import Util._
|
import Util._
|
||||||
|
|
||||||
case object WordBits extends Field[Int]
|
case object WordBits extends Field[Int]
|
||||||
case object StoreDataQueueDepth extends Field[Int]
|
case object StoreDataQueueDepth extends Field[Int]
|
||||||
case object ReplayQueueDepth extends Field[Int]
|
case object ReplayQueueDepth extends Field[Int]
|
||||||
case object NMSHRs extends Field[Int]
|
case object NMSHRs extends Field[Int]
|
||||||
|
case object NIOMSHRs extends Field[Int]
|
||||||
case object LRSCCycles extends Field[Int]
|
case object LRSCCycles extends Field[Int]
|
||||||
|
|
||||||
abstract trait L1HellaCacheParameters extends L1CacheParameters {
|
abstract trait L1HellaCacheParameters extends L1CacheParameters {
|
||||||
val wordBits = params(WordBits)
|
val wordBits = params(WordBits)
|
||||||
val wordBytes = wordBits/8
|
val wordBytes = wordBits/8
|
||||||
val wordOffBits = log2Up(wordBytes)
|
val wordOffBits = log2Up(wordBytes)
|
||||||
|
val beatBytes = params(CacheBlockBytes) / params(TLDataBeats)
|
||||||
|
val beatWords = beatBytes / wordBytes
|
||||||
|
val beatOffBits = log2Up(beatBytes)
|
||||||
val idxMSB = untagBits-1
|
val idxMSB = untagBits-1
|
||||||
val idxLSB = blockOffBits
|
val idxLSB = blockOffBits
|
||||||
val offsetmsb = idxLSB-1
|
val offsetmsb = idxLSB-1
|
||||||
@ -26,6 +31,8 @@ abstract trait L1HellaCacheParameters extends L1CacheParameters {
|
|||||||
val encRowBits = encDataBits*rowWords
|
val encRowBits = encDataBits*rowWords
|
||||||
val sdqDepth = params(StoreDataQueueDepth)
|
val sdqDepth = params(StoreDataQueueDepth)
|
||||||
val nMSHRs = params(NMSHRs)
|
val nMSHRs = params(NMSHRs)
|
||||||
|
val nIOMSHRs = params(NIOMSHRs)
|
||||||
|
val mmioBase = params(MMIOBase)
|
||||||
}
|
}
|
||||||
|
|
||||||
abstract class L1HellaCacheBundle extends Bundle with L1HellaCacheParameters
|
abstract class L1HellaCacheBundle extends Bundle with L1HellaCacheParameters
|
||||||
@ -130,6 +137,83 @@ class WritebackReq extends Release with CacheParameters {
|
|||||||
val way_en = Bits(width = nWays)
|
val way_en = Bits(width = nWays)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
class IOMSHR(id: Int) extends L1HellaCacheModule {
|
||||||
|
val io = new Bundle {
|
||||||
|
val req = Decoupled(new HellaCacheReq).flip
|
||||||
|
val acquire = Decoupled(new Acquire)
|
||||||
|
val grant = Valid(new Grant).flip
|
||||||
|
val resp = Decoupled(new HellaCacheResp)
|
||||||
|
}
|
||||||
|
|
||||||
|
def wordFromBeat(addr: UInt, dat: UInt) = {
|
||||||
|
val offset = addr(beatOffBits - 1, wordOffBits)
|
||||||
|
val shift = Cat(offset, UInt(0, wordOffBits + 3))
|
||||||
|
(dat >> shift)(wordBits - 1, 0)
|
||||||
|
}
|
||||||
|
|
||||||
|
val req = Reg(new HellaCacheReq)
|
||||||
|
val grant_word = Reg(UInt(width = wordBits))
|
||||||
|
|
||||||
|
val storegen = new StoreGen(req.typ, req.addr, req.data)
|
||||||
|
val loadgen = new LoadGen(req.typ, req.addr, grant_word, Bool(false))
|
||||||
|
|
||||||
|
val beat_offset = req.addr(beatOffBits - 1, wordOffBits)
|
||||||
|
val beat_mask = (storegen.mask << Cat(beat_offset, UInt(0, wordOffBits)))
|
||||||
|
val beat_data = Fill(beatWords, storegen.data)
|
||||||
|
|
||||||
|
val addr_byte = req.addr(beatOffBits - 1, 0)
|
||||||
|
val a_type = Mux(isRead(req.cmd), Acquire.getType, Acquire.putType)
|
||||||
|
val union = Mux(isRead(req.cmd),
|
||||||
|
Cat(addr_byte, req.typ, M_XRD), beat_mask)
|
||||||
|
|
||||||
|
val s_idle :: s_acquire :: s_grant :: s_resp :: Nil = Enum(Bits(), 4)
|
||||||
|
val state = Reg(init = s_idle)
|
||||||
|
|
||||||
|
io.req.ready := (state === s_idle)
|
||||||
|
|
||||||
|
io.acquire.valid := (state === s_acquire)
|
||||||
|
io.acquire.bits := Acquire(
|
||||||
|
is_builtin_type = Bool(true),
|
||||||
|
a_type = a_type,
|
||||||
|
client_xact_id = UInt(id),
|
||||||
|
addr_block = req.addr(paddrBits - 1, blockOffBits),
|
||||||
|
addr_beat = req.addr(blockOffBits - 1, beatOffBits),
|
||||||
|
data = beat_data,
|
||||||
|
// alloc bit should always be false
|
||||||
|
union = Cat(union, Bool(false)))
|
||||||
|
|
||||||
|
io.resp.valid := (state === s_resp)
|
||||||
|
io.resp.bits := req
|
||||||
|
io.resp.bits.has_data := isRead(req.cmd)
|
||||||
|
io.resp.bits.data := loadgen.word
|
||||||
|
io.resp.bits.data_subword := loadgen.byte
|
||||||
|
io.resp.bits.store_data := req.data
|
||||||
|
io.resp.bits.nack := Bool(false)
|
||||||
|
io.resp.bits.replay := io.resp.valid
|
||||||
|
|
||||||
|
when (io.req.fire()) {
|
||||||
|
req := io.req.bits
|
||||||
|
state := s_acquire
|
||||||
|
}
|
||||||
|
|
||||||
|
when (io.acquire.fire()) {
|
||||||
|
state := s_grant
|
||||||
|
}
|
||||||
|
|
||||||
|
when (state === s_grant && io.grant.valid) {
|
||||||
|
when (isRead(req.cmd)) {
|
||||||
|
grant_word := wordFromBeat(req.addr, io.grant.bits.data)
|
||||||
|
state := s_resp
|
||||||
|
} .otherwise {
|
||||||
|
state := s_idle
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
when (io.resp.fire()) {
|
||||||
|
state := s_idle
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
class MSHR(id: Int) extends L1HellaCacheModule {
|
class MSHR(id: Int) extends L1HellaCacheModule {
|
||||||
val io = new Bundle {
|
val io = new Bundle {
|
||||||
val req_pri_val = Bool(INPUT)
|
val req_pri_val = Bool(INPUT)
|
||||||
@ -282,6 +366,7 @@ class MSHR(id: Int) extends L1HellaCacheModule {
|
|||||||
class MSHRFile extends L1HellaCacheModule {
|
class MSHRFile extends L1HellaCacheModule {
|
||||||
val io = new Bundle {
|
val io = new Bundle {
|
||||||
val req = Decoupled(new MSHRReq).flip
|
val req = Decoupled(new MSHRReq).flip
|
||||||
|
val resp = Decoupled(new HellaCacheResp)
|
||||||
val secondary_miss = Bool(OUTPUT)
|
val secondary_miss = Bool(OUTPUT)
|
||||||
|
|
||||||
val mem_req = Decoupled(new Acquire)
|
val mem_req = Decoupled(new Acquire)
|
||||||
@ -296,10 +381,13 @@ class MSHRFile extends L1HellaCacheModule {
|
|||||||
val fence_rdy = Bool(OUTPUT)
|
val fence_rdy = Bool(OUTPUT)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// determine if the request is in the memory region or mmio region
|
||||||
|
val cacheable = io.req.bits.addr < UInt(mmioBase)
|
||||||
|
|
||||||
val sdq_val = Reg(init=Bits(0, sdqDepth))
|
val sdq_val = Reg(init=Bits(0, sdqDepth))
|
||||||
val sdq_alloc_id = PriorityEncoder(~sdq_val(sdqDepth-1,0))
|
val sdq_alloc_id = PriorityEncoder(~sdq_val(sdqDepth-1,0))
|
||||||
val sdq_rdy = !sdq_val.andR
|
val sdq_rdy = !sdq_val.andR
|
||||||
val sdq_enq = io.req.valid && io.req.ready && isWrite(io.req.bits.cmd)
|
val sdq_enq = io.req.valid && io.req.ready && cacheable && isWrite(io.req.bits.cmd)
|
||||||
val sdq = Mem(io.req.bits.data, sdqDepth)
|
val sdq = Mem(io.req.bits.data, sdqDepth)
|
||||||
when (sdq_enq) { sdq(sdq_alloc_id) := io.req.bits.data }
|
when (sdq_enq) { sdq(sdq_alloc_id) := io.req.bits.data }
|
||||||
|
|
||||||
@ -313,7 +401,7 @@ class MSHRFile extends L1HellaCacheModule {
|
|||||||
val meta_write_arb = Module(new Arbiter(new L1MetaWriteReq, nMSHRs))
|
val meta_write_arb = Module(new Arbiter(new L1MetaWriteReq, nMSHRs))
|
||||||
val mem_req_arb = Module(new LockingArbiter(
|
val mem_req_arb = Module(new LockingArbiter(
|
||||||
new Acquire,
|
new Acquire,
|
||||||
nMSHRs,
|
nMSHRs + nIOMSHRs,
|
||||||
outerDataBeats,
|
outerDataBeats,
|
||||||
(a: Acquire) => a.hasMultibeatData()))
|
(a: Acquire) => a.hasMultibeatData()))
|
||||||
val wb_req_arb = Module(new Arbiter(new WritebackReq, nMSHRs))
|
val wb_req_arb = Module(new Arbiter(new WritebackReq, nMSHRs))
|
||||||
@ -360,14 +448,44 @@ class MSHRFile extends L1HellaCacheModule {
|
|||||||
when (!mshr.io.probe_rdy) { io.probe_rdy := false }
|
when (!mshr.io.probe_rdy) { io.probe_rdy := false }
|
||||||
}
|
}
|
||||||
|
|
||||||
alloc_arb.io.out.ready := io.req.valid && sdq_rdy && !idx_match
|
alloc_arb.io.out.ready := io.req.valid && sdq_rdy && cacheable && !idx_match
|
||||||
|
|
||||||
io.meta_read <> meta_read_arb.io.out
|
io.meta_read <> meta_read_arb.io.out
|
||||||
io.meta_write <> meta_write_arb.io.out
|
io.meta_write <> meta_write_arb.io.out
|
||||||
io.mem_req <> mem_req_arb.io.out
|
io.mem_req <> mem_req_arb.io.out
|
||||||
io.wb_req <> wb_req_arb.io.out
|
io.wb_req <> wb_req_arb.io.out
|
||||||
|
|
||||||
io.req.ready := Mux(idx_match, tag_match && sec_rdy, pri_rdy) && sdq_rdy
|
val mmio_alloc_arb = Module(new Arbiter(Bool(), nIOMSHRs))
|
||||||
|
val resp_arb = Module(new Arbiter(new HellaCacheResp, nIOMSHRs))
|
||||||
|
|
||||||
|
var mmio_rdy = Bool(false)
|
||||||
|
|
||||||
|
for (i <- 0 until nIOMSHRs) {
|
||||||
|
val id = nMSHRs + i
|
||||||
|
val mshr = Module(new IOMSHR(id))
|
||||||
|
|
||||||
|
mmio_alloc_arb.io.in(i).valid := mshr.io.req.ready
|
||||||
|
mshr.io.req.valid := mmio_alloc_arb.io.in(i).ready
|
||||||
|
mshr.io.req.bits := io.req.bits
|
||||||
|
|
||||||
|
mmio_rdy = mmio_rdy || mshr.io.req.ready
|
||||||
|
|
||||||
|
mem_req_arb.io.in(id) <> mshr.io.acquire
|
||||||
|
|
||||||
|
mshr.io.grant.bits := io.mem_grant.bits
|
||||||
|
mshr.io.grant.valid := io.mem_grant.valid &&
|
||||||
|
io.mem_grant.bits.client_xact_id === UInt(id)
|
||||||
|
|
||||||
|
resp_arb.io.in(i) <> mshr.io.resp
|
||||||
|
|
||||||
|
when (!mshr.io.req.ready) { io.fence_rdy := Bool(false) }
|
||||||
|
}
|
||||||
|
|
||||||
|
mmio_alloc_arb.io.out.ready := io.req.valid && !cacheable
|
||||||
|
|
||||||
|
io.resp <> resp_arb.io.out
|
||||||
|
io.req.ready := Mux(!cacheable, mmio_rdy,
|
||||||
|
Mux(idx_match, tag_match && sec_rdy, pri_rdy) && sdq_rdy)
|
||||||
io.secondary_miss := idx_match
|
io.secondary_miss := idx_match
|
||||||
io.refill := refillMux(io.mem_grant.bits.client_xact_id)
|
io.refill := refillMux(io.mem_grant.bits.client_xact_id)
|
||||||
|
|
||||||
@ -824,7 +942,11 @@ class HellaCache extends L1HellaCacheModule {
|
|||||||
mshrs.io.mem_grant.valid := narrow_grant.fire()
|
mshrs.io.mem_grant.valid := narrow_grant.fire()
|
||||||
mshrs.io.mem_grant.bits := narrow_grant.bits
|
mshrs.io.mem_grant.bits := narrow_grant.bits
|
||||||
narrow_grant.ready := writeArb.io.in(1).ready || !narrow_grant.bits.hasData()
|
narrow_grant.ready := writeArb.io.in(1).ready || !narrow_grant.bits.hasData()
|
||||||
writeArb.io.in(1).valid := narrow_grant.valid && narrow_grant.bits.hasData()
|
/* The last clause here is necessary in order to prevent the responses for
|
||||||
|
* the IOMSHRs from being written into the data array. It works because the
|
||||||
|
* IOMSHR ids start right the ones for the regular MSHRs. */
|
||||||
|
writeArb.io.in(1).valid := narrow_grant.valid && narrow_grant.bits.hasData() &&
|
||||||
|
narrow_grant.bits.client_xact_id < UInt(nMSHRs)
|
||||||
writeArb.io.in(1).bits.addr := mshrs.io.refill.addr
|
writeArb.io.in(1).bits.addr := mshrs.io.refill.addr
|
||||||
writeArb.io.in(1).bits.way_en := mshrs.io.refill.way_en
|
writeArb.io.in(1).bits.way_en := mshrs.io.refill.way_en
|
||||||
writeArb.io.in(1).bits.wmask := ~UInt(0, nWays)
|
writeArb.io.in(1).bits.wmask := ~UInt(0, nWays)
|
||||||
@ -893,16 +1015,25 @@ class HellaCache extends L1HellaCacheModule {
|
|||||||
io.cpu.req.ready := Bool(false)
|
io.cpu.req.ready := Bool(false)
|
||||||
}
|
}
|
||||||
|
|
||||||
io.cpu.resp.valid := (s2_replay || s2_valid_masked && s2_hit) && !s2_data_correctable
|
val cache_resp = Wire(Valid(new HellaCacheResp))
|
||||||
io.cpu.resp.bits.nack := s2_valid && s2_nack
|
cache_resp.valid := (s2_replay || s2_valid_masked && s2_hit) && !s2_data_correctable
|
||||||
io.cpu.resp.bits := s2_req
|
cache_resp.bits := s2_req
|
||||||
io.cpu.resp.bits.has_data := isRead(s2_req.cmd) || s2_sc
|
cache_resp.bits.has_data := isRead(s2_req.cmd) || s2_sc
|
||||||
io.cpu.resp.bits.replay := s2_replay
|
cache_resp.bits.data := loadgen.word
|
||||||
io.cpu.resp.bits.data := loadgen.word
|
cache_resp.bits.data_subword := loadgen.byte | s2_sc_fail
|
||||||
io.cpu.resp.bits.data_subword := loadgen.byte | s2_sc_fail
|
cache_resp.bits.store_data := s2_req.data
|
||||||
io.cpu.resp.bits.store_data := s2_req.data
|
cache_resp.bits.nack := s2_valid && s2_nack
|
||||||
io.cpu.ordered := mshrs.io.fence_rdy && !s1_valid && !s2_valid
|
cache_resp.bits.replay := s2_replay
|
||||||
|
|
||||||
|
val uncache_resp = Wire(Valid(new HellaCacheResp))
|
||||||
|
uncache_resp.bits := mshrs.io.resp.bits
|
||||||
|
uncache_resp.valid := mshrs.io.resp.valid
|
||||||
|
|
||||||
|
val cache_pass = s2_valid || s2_replay
|
||||||
|
mshrs.io.resp.ready := !cache_pass
|
||||||
|
|
||||||
|
io.cpu.resp := Mux(cache_pass, cache_resp, uncache_resp)
|
||||||
|
io.cpu.ordered := mshrs.io.fence_rdy && !s1_valid && !s2_valid
|
||||||
io.cpu.replay_next.valid := s1_replay && (s1_read || s1_sc)
|
io.cpu.replay_next.valid := s1_replay && (s1_read || s1_sc)
|
||||||
io.cpu.replay_next.bits := s1_req.tag
|
io.cpu.replay_next.bits := s1_req.tag
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user