Merge branch 'master' into pipeline-mmio
This commit is contained in:
@ -121,7 +121,7 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) {
|
||||
tlb.io.req.bits.sfence.valid := s1_sfence
|
||||
tlb.io.req.bits.sfence.bits.rs1 := s1_req.typ(0)
|
||||
tlb.io.req.bits.sfence.bits.rs2 := s1_req.typ(1)
|
||||
tlb.io.req.bits.sfence.bits.asid := io.cpu.s1_data
|
||||
tlb.io.req.bits.sfence.bits.asid := io.cpu.s1_data.data
|
||||
tlb.io.req.bits.passthrough := s1_req.phys
|
||||
tlb.io.req.bits.vaddr := s1_req.addr
|
||||
tlb.io.req.bits.instruction := false
|
||||
@ -155,6 +155,7 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) {
|
||||
}
|
||||
val s1_data_way = Mux(inWriteback, releaseWay, s1_hit_way)
|
||||
val s1_data = Mux1H(s1_data_way, data.io.resp) // retime into s2 if critical
|
||||
val s1_mask = Mux(s1_req.cmd === M_PWR, io.cpu.s1_data.mask, new StoreGen(s1_req.typ, s1_req.addr, UInt(0), wordBytes).mask)
|
||||
|
||||
val s2_valid = Reg(next=s1_valid_masked && !s1_sfence, init=Bool(false)) && !io.cpu.s2_xcpt.asUInt.orR
|
||||
val s2_probe = Reg(next=s1_probe, init=Bool(false))
|
||||
@ -229,10 +230,10 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) {
|
||||
val pstore1_cmd = RegEnable(s1_req.cmd, s1_valid_not_nacked && s1_write)
|
||||
val pstore1_typ = RegEnable(s1_req.typ, s1_valid_not_nacked && s1_write)
|
||||
val pstore1_addr = RegEnable(s1_paddr, s1_valid_not_nacked && s1_write)
|
||||
val pstore1_data = RegEnable(io.cpu.s1_data, s1_valid_not_nacked && s1_write)
|
||||
val pstore1_data = RegEnable(io.cpu.s1_data.data, s1_valid_not_nacked && s1_write)
|
||||
val pstore1_way = RegEnable(s1_hit_way, s1_valid_not_nacked && s1_write)
|
||||
val pstore1_storegen = new StoreGen(pstore1_typ, pstore1_addr, pstore1_data, wordBytes)
|
||||
val pstore1_storegen_data = Wire(init = pstore1_storegen.data)
|
||||
val pstore1_mask = RegEnable(s1_mask, s1_valid_not_nacked && s1_write)
|
||||
val pstore1_storegen_data = Wire(init = pstore1_data)
|
||||
val pstore1_amo = Bool(usingAtomics) && isRead(pstore1_cmd)
|
||||
val pstore_drain_structural = pstore1_valid && pstore2_valid && ((s1_valid && s1_write) || pstore1_amo)
|
||||
val pstore_drain_opportunistic = !(io.cpu.req.valid && isRead(io.cpu.req.bits.cmd))
|
||||
@ -252,21 +253,20 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) {
|
||||
val pstore2_addr = RegEnable(pstore1_addr, advance_pstore1)
|
||||
val pstore2_way = RegEnable(pstore1_way, advance_pstore1)
|
||||
val pstore2_storegen_data = RegEnable(pstore1_storegen_data, advance_pstore1)
|
||||
val pstore2_storegen_mask = RegEnable(pstore1_storegen.mask, advance_pstore1)
|
||||
val pstore2_storegen_mask = RegEnable(pstore1_mask, advance_pstore1)
|
||||
dataArb.io.in(0).valid := pstore_drain
|
||||
dataArb.io.in(0).bits.write := true
|
||||
dataArb.io.in(0).bits.addr := Mux(pstore2_valid, pstore2_addr, pstore1_addr)
|
||||
dataArb.io.in(0).bits.way_en := Mux(pstore2_valid, pstore2_way, pstore1_way)
|
||||
dataArb.io.in(0).bits.wdata := Fill(rowWords, Mux(pstore2_valid, pstore2_storegen_data, pstore1_storegen_data))
|
||||
val pstore_mask_shift = Mux(pstore2_valid, pstore2_addr, pstore1_addr).extract(rowOffBits-1,offsetlsb) << wordOffBits
|
||||
dataArb.io.in(0).bits.wmask := Mux(pstore2_valid, pstore2_storegen_mask, pstore1_storegen.mask) << pstore_mask_shift
|
||||
dataArb.io.in(0).bits.wmask := Mux(pstore2_valid, pstore2_storegen_mask, pstore1_mask) << pstore_mask_shift
|
||||
|
||||
// store->load RAW hazard detection
|
||||
val s1_storegen = new StoreGen(s1_req.typ, s1_req.addr, UInt(0), wordBytes)
|
||||
val s1_idx = s1_req.addr(idxMSB, wordOffBits)
|
||||
val s1_raw_hazard = s1_read &&
|
||||
((pstore1_valid && pstore1_addr(idxMSB, wordOffBits) === s1_idx && (pstore1_storegen.mask & s1_storegen.mask).orR) ||
|
||||
(pstore2_valid && pstore2_addr(idxMSB, wordOffBits) === s1_idx && (pstore2_storegen_mask & s1_storegen.mask).orR))
|
||||
((pstore1_valid && pstore1_addr(idxMSB, wordOffBits) === s1_idx && (pstore1_mask & s1_mask).orR) ||
|
||||
(pstore2_valid && pstore2_addr(idxMSB, wordOffBits) === s1_idx && (pstore2_storegen_mask & s1_mask).orR))
|
||||
when (s1_valid && s1_raw_hazard) { s1_nack := true }
|
||||
|
||||
metaWriteArb.io.in(0).valid := (s2_valid_hit && s2_update_meta) || (s2_victimize && !s2_victim_dirty)
|
||||
@ -279,8 +279,8 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) {
|
||||
val a_source = PriorityEncoder(~uncachedInFlight.asUInt << mmioOffset) // skip the MSHR
|
||||
val acquire_address = s2_req_block_addr
|
||||
val access_address = s2_req.addr
|
||||
val a_size = s2_req.typ(MT_SZ-2, 0)
|
||||
val a_data = Fill(beatWords, pstore1_storegen.data)
|
||||
val a_size = mtSize(s2_req.typ)
|
||||
val a_data = Fill(beatWords, pstore1_data)
|
||||
val acquire = if (edge.manager.anySupportAcquireB) {
|
||||
edge.Acquire(UInt(0), acquire_address, lgCacheBlockBytes, s2_grow_param)._2 // Cacheability checked by tlb
|
||||
} else {
|
||||
@ -517,14 +517,14 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) {
|
||||
val loadgen = new LoadGen(s2_req.typ, mtSigned(s2_req.typ), s2_req.addr, s2_data_word, s2_sc, wordBytes)
|
||||
io.cpu.resp.bits.data := loadgen.data | s2_sc_fail
|
||||
io.cpu.resp.bits.data_word_bypass := loadgen.wordData
|
||||
io.cpu.resp.bits.data_raw := s2_data_word
|
||||
io.cpu.resp.bits.store_data := pstore1_data
|
||||
|
||||
// AMOs
|
||||
if (usingAtomics) {
|
||||
val amoalu = Module(new AMOALU(xLen))
|
||||
amoalu.io.addr := pstore1_addr
|
||||
amoalu.io.mask := pstore1_mask
|
||||
amoalu.io.cmd := pstore1_cmd
|
||||
amoalu.io.typ := pstore1_typ
|
||||
amoalu.io.lhs := s2_data_word
|
||||
amoalu.io.rhs := pstore1_data
|
||||
pstore1_storegen_data := amoalu.io.out
|
||||
|
@ -103,6 +103,7 @@ class HellaCacheResp(implicit p: Parameters) extends CoreBundle()(p)
|
||||
val replay = Bool()
|
||||
val has_data = Bool()
|
||||
val data_word_bypass = Bits(width = coreDataBits)
|
||||
val data_raw = Bits(width = coreDataBits)
|
||||
val store_data = Bits(width = coreDataBits)
|
||||
}
|
||||
|
||||
@ -117,11 +118,16 @@ class HellaCacheExceptions extends Bundle {
|
||||
val ae = new AlignmentExceptions
|
||||
}
|
||||
|
||||
class HellaCacheWriteData(implicit p: Parameters) extends CoreBundle()(p) {
|
||||
val data = UInt(width = coreDataBits)
|
||||
val mask = UInt(width = coreDataBytes)
|
||||
}
|
||||
|
||||
// interface between D$ and processor/DTLB
|
||||
class HellaCacheIO(implicit p: Parameters) extends CoreBundle()(p) {
|
||||
val req = Decoupled(new HellaCacheReq)
|
||||
val s1_kill = Bool(OUTPUT) // kill previous cycle's req
|
||||
val s1_data = Bits(OUTPUT, coreDataBits) // data for previous cycle's req
|
||||
val s1_data = new HellaCacheWriteData().asOutput // data for previous cycle's req
|
||||
val s2_nack = Bool(INPUT) // req from two cycles ago is rejected
|
||||
|
||||
// performance events
|
||||
|
@ -129,7 +129,7 @@ class IDecode(implicit val p: Parameters) extends DecodeConstants
|
||||
class SDecode(implicit val p: Parameters) extends DecodeConstants
|
||||
{
|
||||
val table: Array[(BitPat, List[BitPat])] = Array(
|
||||
SFENCE_VMA->List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_SFENCE, MT_X, N,N,N,N,N,N,CSR.N,N,N,N,N),
|
||||
SFENCE_VMA->List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_SFENCE, MT_W, N,N,N,N,N,N,CSR.N,N,N,N,N),
|
||||
SRET-> List(Y,N,N,N,N,N,N,X,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,N,N,N,CSR.I,N,N,N,N))
|
||||
}
|
||||
|
||||
|
@ -69,20 +69,18 @@ class IOMSHR(id: Int)(implicit edge: TLEdgeOut, p: Parameters) extends L1HellaCa
|
||||
}
|
||||
|
||||
val req = Reg(new HellaCacheReq)
|
||||
val req_cmd_sc = req.cmd === M_XSC
|
||||
val grant_word = Reg(UInt(width = wordBits))
|
||||
|
||||
val s_idle :: s_mem_access :: s_mem_ack :: s_resp :: Nil = Enum(Bits(), 4)
|
||||
val state = Reg(init = s_idle)
|
||||
io.req.ready := (state === s_idle)
|
||||
|
||||
val storegen = new StoreGen(req.typ, req.addr, req.data, wordBytes)
|
||||
val loadgen = new LoadGen(req.typ, mtSigned(req.typ), req.addr, grant_word, req_cmd_sc, wordBytes)
|
||||
val loadgen = new LoadGen(req.typ, mtSigned(req.typ), req.addr, grant_word, false.B, wordBytes)
|
||||
|
||||
val a_source = UInt(id)
|
||||
val a_address = req.addr
|
||||
val a_size = storegen.size
|
||||
val a_data = Fill(beatWords, storegen.data)
|
||||
val a_size = mtSize(req.typ)
|
||||
val a_data = Fill(beatWords, req.data)
|
||||
|
||||
val get = edge.Get(a_source, a_address, a_size)._2
|
||||
val put = edge.Put(a_source, a_address, a_size, a_data)._2
|
||||
@ -99,9 +97,10 @@ class IOMSHR(id: Int)(implicit edge: TLEdgeOut, p: Parameters) extends L1HellaCa
|
||||
M_XA_MAXU -> edge.Arithmetic(a_source, a_address, a_size, a_data, TLAtomics.MAXU)._2))
|
||||
} else {
|
||||
// If no managers support atomics, assert fail if processor asks for them
|
||||
assert (!isAMO(req.cmd))
|
||||
assert(state === s_idle || !isAMO(req.cmd))
|
||||
Wire(new TLBundleA(edge.bundle))
|
||||
}
|
||||
assert(state === s_idle || req.cmd =/= M_XSC)
|
||||
|
||||
io.mem_access.valid := (state === s_mem_access)
|
||||
io.mem_access.bits := Mux(isAMO(req.cmd), atomics, Mux(isRead(req.cmd), get, put))
|
||||
@ -110,7 +109,7 @@ class IOMSHR(id: Int)(implicit edge: TLEdgeOut, p: Parameters) extends L1HellaCa
|
||||
io.resp.valid := (state === s_resp)
|
||||
io.resp.bits := req
|
||||
io.resp.bits.has_data := isRead(req.cmd)
|
||||
io.resp.bits.data := loadgen.data | req_cmd_sc
|
||||
io.resp.bits.data := loadgen.data
|
||||
io.resp.bits.store_data := req.data
|
||||
io.resp.bits.replay := Bool(true)
|
||||
|
||||
@ -696,6 +695,8 @@ class NonBlockingDCacheModule(outer: NonBlockingDCache) extends HellaCacheModule
|
||||
val s1_read = isRead(s1_req.cmd)
|
||||
val s1_write = isWrite(s1_req.cmd)
|
||||
val s1_readwrite = s1_read || s1_write || isPrefetch(s1_req.cmd)
|
||||
// check for unsupported operations
|
||||
assert(!s1_valid || !s1_req.cmd.isOneOf(M_PWR))
|
||||
|
||||
val dtlb = Module(new TLB(log2Ceil(coreDataBytes), nTLBEntries))
|
||||
io.ptw <> dtlb.io.ptw
|
||||
@ -703,7 +704,7 @@ class NonBlockingDCacheModule(outer: NonBlockingDCache) extends HellaCacheModule
|
||||
dtlb.io.req.bits.sfence.valid := s1_sfence
|
||||
dtlb.io.req.bits.sfence.bits.rs1 := s1_req.typ(0)
|
||||
dtlb.io.req.bits.sfence.bits.rs2 := s1_req.typ(1)
|
||||
dtlb.io.req.bits.sfence.bits.asid := io.cpu.s1_data
|
||||
dtlb.io.req.bits.sfence.bits.asid := io.cpu.s1_data.data
|
||||
dtlb.io.req.bits.passthrough := s1_req.phys
|
||||
dtlb.io.req.bits.vaddr := s1_req.addr
|
||||
dtlb.io.req.bits.instruction := Bool(false)
|
||||
@ -736,7 +737,7 @@ class NonBlockingDCacheModule(outer: NonBlockingDCache) extends HellaCacheModule
|
||||
s2_req.phys := s1_req.phys
|
||||
s2_req.addr := s1_addr
|
||||
when (s1_write) {
|
||||
s2_req.data := Mux(s1_replay, mshrs.io.replay.bits.data, io.cpu.s1_data)
|
||||
s2_req.data := Mux(s1_replay, mshrs.io.replay.bits.data, io.cpu.s1_data.data)
|
||||
}
|
||||
when (s1_recycled) { s2_req.data := s1_req.data }
|
||||
s2_req.tag := s1_req.tag
|
||||
@ -927,10 +928,9 @@ class NonBlockingDCacheModule(outer: NonBlockingDCache) extends HellaCacheModule
|
||||
val s2_data_word_prebypass = s2_data_uncorrected >> Cat(s2_word_idx, Bits(0,log2Up(coreDataBits)))
|
||||
val s2_data_word = Mux(s2_store_bypass, s2_store_bypass_data, s2_data_word_prebypass)
|
||||
val loadgen = new LoadGen(s2_req.typ, mtSigned(s2_req.typ), s2_req.addr, s2_data_word, s2_sc, wordBytes)
|
||||
|
||||
amoalu.io.addr := s2_req.addr
|
||||
|
||||
amoalu.io.mask := new StoreGen(s2_req.typ, s2_req.addr, 0.U, xLen/8).mask
|
||||
amoalu.io.cmd := s2_req.cmd
|
||||
amoalu.io.typ := s2_req.typ
|
||||
amoalu.io.lhs := s2_data_word
|
||||
amoalu.io.rhs := s2_req.data
|
||||
|
||||
@ -972,6 +972,7 @@ class NonBlockingDCacheModule(outer: NonBlockingDCache) extends HellaCacheModule
|
||||
io.cpu.s2_nack := s2_valid && s2_nack
|
||||
io.cpu.resp := Mux(mshrs.io.resp.ready, uncache_resp, cache_resp)
|
||||
io.cpu.resp.bits.data_word_bypass := loadgen.wordData
|
||||
io.cpu.resp.bits.data_raw := s2_data_word
|
||||
io.cpu.ordered := mshrs.io.fence_rdy && !s1_valid && !s2_valid
|
||||
io.cpu.replay_next := (s1_replay && s1_read) || mshrs.io.replay_next
|
||||
|
||||
|
@ -394,7 +394,8 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p)
|
||||
mem_reg_pc := ex_reg_pc
|
||||
mem_reg_wdata := alu.io.out
|
||||
when (ex_ctrl.rxs2 && (ex_ctrl.mem || ex_ctrl.rocc)) {
|
||||
mem_reg_rs2 := ex_rs(1)
|
||||
val typ = Mux(ex_ctrl.rocc, log2Ceil(xLen/8).U, ex_ctrl.mem_type)
|
||||
mem_reg_rs2 := new uncore.util.StoreGen(typ, 0.U, ex_rs(1), coreDataBytes).data
|
||||
}
|
||||
}
|
||||
|
||||
@ -625,7 +626,7 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p)
|
||||
io.dmem.req.bits.phys := Bool(false)
|
||||
io.dmem.req.bits.addr := encodeVirtualAddress(ex_rs(0), alu.io.adder_out)
|
||||
io.dmem.invalidate_lr := wb_xcpt
|
||||
io.dmem.s1_data := Mux(mem_ctrl.fp, io.fpu.store_data, mem_reg_rs2)
|
||||
io.dmem.s1_data.data := Mux(mem_ctrl.fp, io.fpu.store_data, mem_reg_rs2)
|
||||
io.dmem.s1_kill := killm_common || mem_breakpoint
|
||||
|
||||
io.rocc.cmd.valid := wb_reg_valid && wb_ctrl.rocc && !replay_wb_common
|
||||
|
@ -22,9 +22,9 @@ class ScratchpadSlavePort(address: AddressSet)(implicit p: Parameters) extends L
|
||||
resources = device.reg,
|
||||
regionType = RegionType.UNCACHED,
|
||||
executable = true,
|
||||
supportsArithmetic = if (usingAtomics) TransferSizes(1, coreDataBytes) else TransferSizes.none,
|
||||
supportsLogical = if (usingAtomics) TransferSizes(1, coreDataBytes) else TransferSizes.none,
|
||||
supportsPutPartial = TransferSizes.none, // Can't support PutPartial
|
||||
supportsArithmetic = if (usingAtomics) TransferSizes(4, coreDataBytes) else TransferSizes.none,
|
||||
supportsLogical = if (usingAtomics) TransferSizes(4, coreDataBytes) else TransferSizes.none,
|
||||
supportsPutPartial = TransferSizes(1, coreDataBytes),
|
||||
supportsPutFull = TransferSizes(1, coreDataBytes),
|
||||
supportsGet = TransferSizes(1, coreDataBytes),
|
||||
fifoId = Some(0))), // requests handled in FIFO order
|
||||
@ -48,13 +48,14 @@ class ScratchpadSlavePort(address: AddressSet)(implicit p: Parameters) extends L
|
||||
when (io.dmem.req.fire()) { state := s_wait }
|
||||
|
||||
val acq = Reg(tl_in.a.bits)
|
||||
when (io.dmem.resp.valid) { acq.data := io.dmem.resp.bits.data }
|
||||
when (io.dmem.resp.valid) { acq.data := io.dmem.resp.bits.data_raw }
|
||||
when (tl_in.a.fire()) { acq := tl_in.a.bits }
|
||||
|
||||
def formCacheReq(a: TLBundleA) = {
|
||||
val req = Wire(new HellaCacheReq)
|
||||
req.cmd := MuxLookup(a.opcode, Wire(M_XRD), Array(
|
||||
TLMessages.PutFullData -> M_XWR,
|
||||
TLMessages.PutPartialData -> M_PWR,
|
||||
TLMessages.ArithmeticData -> MuxLookup(a.param, Wire(M_XRD), Array(
|
||||
TLAtomics.MIN -> M_XA_MIN,
|
||||
TLAtomics.MAX -> M_XA_MAX,
|
||||
@ -67,9 +68,8 @@ class ScratchpadSlavePort(address: AddressSet)(implicit p: Parameters) extends L
|
||||
TLAtomics.AND -> M_XA_AND,
|
||||
TLAtomics.SWAP -> M_XA_SWAP)),
|
||||
TLMessages.Get -> M_XRD))
|
||||
// treat all loads as full words, so bytes appear in correct lane
|
||||
req.typ := Mux(edge.hasData(a), a.size, log2Ceil(coreDataBytes))
|
||||
req.addr := Mux(edge.hasData(a), a.address, ~(~a.address | (coreDataBytes-1)))
|
||||
req.typ := a.size
|
||||
req.addr := a.address
|
||||
req.tag := UInt(0)
|
||||
req.phys := true
|
||||
req
|
||||
@ -79,23 +79,16 @@ class ScratchpadSlavePort(address: AddressSet)(implicit p: Parameters) extends L
|
||||
io.dmem.req.valid := (tl_in.a.valid && ready) || state === s_replay
|
||||
tl_in.a.ready := io.dmem.req.ready && ready
|
||||
io.dmem.req.bits := formCacheReq(Mux(state === s_replay, acq, tl_in.a.bits))
|
||||
// the TL data is already in the correct byte lane, but the D$
|
||||
// expects right-justified store data, so that it can steer the bytes.
|
||||
io.dmem.s1_data := new LoadGen(acq.size, Bool(false), acq.address(log2Ceil(coreDataBytes)-1,0), acq.data, Bool(false), coreDataBytes).data
|
||||
io.dmem.s1_data.data := acq.data
|
||||
io.dmem.s1_data.mask := acq.mask
|
||||
io.dmem.s1_kill := false
|
||||
io.dmem.invalidate_lr := false
|
||||
|
||||
// place AMO data in correct word lane
|
||||
val minAMOBytes = 4
|
||||
val grantData = Mux(io.dmem.resp.valid, io.dmem.resp.bits.data, acq.data)
|
||||
val alignedGrantData =
|
||||
Mux(edge.hasData(acq) && (acq.size <= log2Ceil(minAMOBytes)), Fill(coreDataBytes/minAMOBytes, grantData(8*minAMOBytes-1, 0)), grantData)
|
||||
|
||||
tl_in.d.valid := io.dmem.resp.valid || state === s_grant
|
||||
tl_in.d.bits := Mux(acq.opcode === TLMessages.PutFullData,
|
||||
tl_in.d.bits := Mux(acq.opcode.isOneOf(TLMessages.PutFullData, TLMessages.PutPartialData),
|
||||
edge.AccessAck(acq, UInt(0)),
|
||||
edge.AccessAck(acq, UInt(0), UInt(0)))
|
||||
tl_in.d.bits.data := alignedGrantData
|
||||
tl_in.d.bits.data := Mux(io.dmem.resp.valid, io.dmem.resp.bits.data_raw, acq.data)
|
||||
|
||||
// Tie off unused channels
|
||||
tl_in.b.valid := Bool(false)
|
||||
|
@ -123,7 +123,7 @@ class SimpleHellaCacheIF(implicit p: Parameters) extends Module
|
||||
io.cache.invalidate_lr := io.requestor.invalidate_lr
|
||||
io.cache.req <> req_arb.io.out
|
||||
io.cache.s1_kill := io.cache.s2_nack
|
||||
io.cache.s1_data := RegEnable(req_arb.io.out.bits.data, s0_req_fire)
|
||||
io.cache.s1_data.data := RegEnable(req_arb.io.out.bits.data, s0_req_fire)
|
||||
|
||||
replayq.io.nack.valid := (io.cache.s2_nack || s2_kill) && s2_req_fire
|
||||
replayq.io.nack.bits := s2_req_tag
|
||||
|
Reference in New Issue
Block a user