1
0

Support PutPartial in ScratchpadSlavePort

This commit is contained in:
Andrew Waterman 2017-05-02 03:04:41 -07:00
parent 938b089543
commit 3a1a37d41b
10 changed files with 64 additions and 68 deletions

View File

@ -121,7 +121,7 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) {
tlb.io.req.bits.sfence.valid := s1_sfence tlb.io.req.bits.sfence.valid := s1_sfence
tlb.io.req.bits.sfence.bits.rs1 := s1_req.typ(0) tlb.io.req.bits.sfence.bits.rs1 := s1_req.typ(0)
tlb.io.req.bits.sfence.bits.rs2 := s1_req.typ(1) tlb.io.req.bits.sfence.bits.rs2 := s1_req.typ(1)
tlb.io.req.bits.sfence.bits.asid := io.cpu.s1_data tlb.io.req.bits.sfence.bits.asid := io.cpu.s1_data.data
tlb.io.req.bits.passthrough := s1_req.phys tlb.io.req.bits.passthrough := s1_req.phys
tlb.io.req.bits.vaddr := s1_req.addr tlb.io.req.bits.vaddr := s1_req.addr
tlb.io.req.bits.instruction := false tlb.io.req.bits.instruction := false
@ -155,6 +155,7 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) {
} }
val s1_data_way = Mux(inWriteback, releaseWay, s1_hit_way) val s1_data_way = Mux(inWriteback, releaseWay, s1_hit_way)
val s1_data = Mux1H(s1_data_way, data.io.resp) // retime into s2 if critical val s1_data = Mux1H(s1_data_way, data.io.resp) // retime into s2 if critical
val s1_mask = Mux(s1_req.cmd === M_PWR, io.cpu.s1_data.mask, new StoreGen(s1_req.typ, s1_req.addr, UInt(0), wordBytes).mask)
val s2_valid = Reg(next=s1_valid_masked && !s1_sfence, init=Bool(false)) && !io.cpu.s2_xcpt.asUInt.orR val s2_valid = Reg(next=s1_valid_masked && !s1_sfence, init=Bool(false)) && !io.cpu.s2_xcpt.asUInt.orR
val s2_probe = Reg(next=s1_probe, init=Bool(false)) val s2_probe = Reg(next=s1_probe, init=Bool(false))
@ -229,10 +230,10 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) {
val pstore1_cmd = RegEnable(s1_req.cmd, s1_valid_not_nacked && s1_write) val pstore1_cmd = RegEnable(s1_req.cmd, s1_valid_not_nacked && s1_write)
val pstore1_typ = RegEnable(s1_req.typ, s1_valid_not_nacked && s1_write) val pstore1_typ = RegEnable(s1_req.typ, s1_valid_not_nacked && s1_write)
val pstore1_addr = RegEnable(s1_paddr, s1_valid_not_nacked && s1_write) val pstore1_addr = RegEnable(s1_paddr, s1_valid_not_nacked && s1_write)
val pstore1_data = RegEnable(io.cpu.s1_data, s1_valid_not_nacked && s1_write) val pstore1_data = RegEnable(io.cpu.s1_data.data, s1_valid_not_nacked && s1_write)
val pstore1_way = RegEnable(s1_hit_way, s1_valid_not_nacked && s1_write) val pstore1_way = RegEnable(s1_hit_way, s1_valid_not_nacked && s1_write)
val pstore1_storegen = new StoreGen(pstore1_typ, pstore1_addr, pstore1_data, wordBytes) val pstore1_mask = RegEnable(s1_mask, s1_valid_not_nacked && s1_write)
val pstore1_storegen_data = Wire(init = pstore1_storegen.data) val pstore1_storegen_data = Wire(init = pstore1_data)
val pstore1_amo = Bool(usingAtomics) && isRead(pstore1_cmd) val pstore1_amo = Bool(usingAtomics) && isRead(pstore1_cmd)
val pstore_drain_structural = pstore1_valid && pstore2_valid && ((s1_valid && s1_write) || pstore1_amo) val pstore_drain_structural = pstore1_valid && pstore2_valid && ((s1_valid && s1_write) || pstore1_amo)
val pstore_drain_opportunistic = !(io.cpu.req.valid && isRead(io.cpu.req.bits.cmd)) val pstore_drain_opportunistic = !(io.cpu.req.valid && isRead(io.cpu.req.bits.cmd))
@ -252,21 +253,20 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) {
val pstore2_addr = RegEnable(pstore1_addr, advance_pstore1) val pstore2_addr = RegEnable(pstore1_addr, advance_pstore1)
val pstore2_way = RegEnable(pstore1_way, advance_pstore1) val pstore2_way = RegEnable(pstore1_way, advance_pstore1)
val pstore2_storegen_data = RegEnable(pstore1_storegen_data, advance_pstore1) val pstore2_storegen_data = RegEnable(pstore1_storegen_data, advance_pstore1)
val pstore2_storegen_mask = RegEnable(pstore1_storegen.mask, advance_pstore1) val pstore2_storegen_mask = RegEnable(pstore1_mask, advance_pstore1)
dataArb.io.in(0).valid := pstore_drain dataArb.io.in(0).valid := pstore_drain
dataArb.io.in(0).bits.write := true dataArb.io.in(0).bits.write := true
dataArb.io.in(0).bits.addr := Mux(pstore2_valid, pstore2_addr, pstore1_addr) dataArb.io.in(0).bits.addr := Mux(pstore2_valid, pstore2_addr, pstore1_addr)
dataArb.io.in(0).bits.way_en := Mux(pstore2_valid, pstore2_way, pstore1_way) dataArb.io.in(0).bits.way_en := Mux(pstore2_valid, pstore2_way, pstore1_way)
dataArb.io.in(0).bits.wdata := Fill(rowWords, Mux(pstore2_valid, pstore2_storegen_data, pstore1_storegen_data)) dataArb.io.in(0).bits.wdata := Fill(rowWords, Mux(pstore2_valid, pstore2_storegen_data, pstore1_storegen_data))
val pstore_mask_shift = Mux(pstore2_valid, pstore2_addr, pstore1_addr).extract(rowOffBits-1,offsetlsb) << wordOffBits val pstore_mask_shift = Mux(pstore2_valid, pstore2_addr, pstore1_addr).extract(rowOffBits-1,offsetlsb) << wordOffBits
dataArb.io.in(0).bits.wmask := Mux(pstore2_valid, pstore2_storegen_mask, pstore1_storegen.mask) << pstore_mask_shift dataArb.io.in(0).bits.wmask := Mux(pstore2_valid, pstore2_storegen_mask, pstore1_mask) << pstore_mask_shift
// store->load RAW hazard detection // store->load RAW hazard detection
val s1_storegen = new StoreGen(s1_req.typ, s1_req.addr, UInt(0), wordBytes)
val s1_idx = s1_req.addr(idxMSB, wordOffBits) val s1_idx = s1_req.addr(idxMSB, wordOffBits)
val s1_raw_hazard = s1_read && val s1_raw_hazard = s1_read &&
((pstore1_valid && pstore1_addr(idxMSB, wordOffBits) === s1_idx && (pstore1_storegen.mask & s1_storegen.mask).orR) || ((pstore1_valid && pstore1_addr(idxMSB, wordOffBits) === s1_idx && (pstore1_mask & s1_mask).orR) ||
(pstore2_valid && pstore2_addr(idxMSB, wordOffBits) === s1_idx && (pstore2_storegen_mask & s1_storegen.mask).orR)) (pstore2_valid && pstore2_addr(idxMSB, wordOffBits) === s1_idx && (pstore2_storegen_mask & s1_mask).orR))
when (s1_valid && s1_raw_hazard) { s1_nack := true } when (s1_valid && s1_raw_hazard) { s1_nack := true }
metaWriteArb.io.in(0).valid := (s2_valid_hit && s2_update_meta) || (s2_victimize && !s2_victim_dirty) metaWriteArb.io.in(0).valid := (s2_valid_hit && s2_update_meta) || (s2_victimize && !s2_victim_dirty)
@ -279,8 +279,8 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) {
val a_source = PriorityEncoder(~uncachedInFlight.asUInt << mmioOffset) // skip the MSHR val a_source = PriorityEncoder(~uncachedInFlight.asUInt << mmioOffset) // skip the MSHR
val acquire_address = s2_req_block_addr val acquire_address = s2_req_block_addr
val access_address = s2_req.addr val access_address = s2_req.addr
val a_size = s2_req.typ(MT_SZ-2, 0) val a_size = mtSize(s2_req.typ)
val a_data = Fill(beatWords, pstore1_storegen.data) val a_data = Fill(beatWords, pstore1_data)
val acquire = if (edge.manager.anySupportAcquireB) { val acquire = if (edge.manager.anySupportAcquireB) {
edge.Acquire(UInt(0), acquire_address, lgCacheBlockBytes, s2_grow_param)._2 // Cacheability checked by tlb edge.Acquire(UInt(0), acquire_address, lgCacheBlockBytes, s2_grow_param)._2 // Cacheability checked by tlb
} else { } else {
@ -523,9 +523,8 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) {
// AMOs // AMOs
if (usingAtomics) { if (usingAtomics) {
val amoalu = Module(new AMOALU(xLen)) val amoalu = Module(new AMOALU(xLen))
amoalu.io.addr := pstore1_addr amoalu.io.mask := pstore1_mask
amoalu.io.cmd := pstore1_cmd amoalu.io.cmd := pstore1_cmd
amoalu.io.typ := pstore1_typ
amoalu.io.lhs := s2_data_word amoalu.io.lhs := s2_data_word
amoalu.io.rhs := pstore1_data amoalu.io.rhs := pstore1_data
pstore1_storegen_data := amoalu.io.out pstore1_storegen_data := amoalu.io.out

View File

@ -118,11 +118,16 @@ class HellaCacheExceptions extends Bundle {
val ae = new AlignmentExceptions val ae = new AlignmentExceptions
} }
class HellaCacheWriteData(implicit p: Parameters) extends CoreBundle()(p) {
val data = UInt(width = coreDataBits)
val mask = UInt(width = coreDataBytes)
}
// interface between D$ and processor/DTLB // interface between D$ and processor/DTLB
class HellaCacheIO(implicit p: Parameters) extends CoreBundle()(p) { class HellaCacheIO(implicit p: Parameters) extends CoreBundle()(p) {
val req = Decoupled(new HellaCacheReq) val req = Decoupled(new HellaCacheReq)
val s1_kill = Bool(OUTPUT) // kill previous cycle's req val s1_kill = Bool(OUTPUT) // kill previous cycle's req
val s1_data = Bits(OUTPUT, coreDataBits) // data for previous cycle's req val s1_data = new HellaCacheWriteData().asOutput // data for previous cycle's req
val s2_nack = Bool(INPUT) // req from two cycles ago is rejected val s2_nack = Bool(INPUT) // req from two cycles ago is rejected
// performance events // performance events

View File

@ -129,7 +129,7 @@ class IDecode(implicit val p: Parameters) extends DecodeConstants
class SDecode(implicit val p: Parameters) extends DecodeConstants class SDecode(implicit val p: Parameters) extends DecodeConstants
{ {
val table: Array[(BitPat, List[BitPat])] = Array( val table: Array[(BitPat, List[BitPat])] = Array(
SFENCE_VMA->List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_SFENCE, MT_X, N,N,N,N,N,N,CSR.N,N,N,N,N), SFENCE_VMA->List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_SFENCE, MT_W, N,N,N,N,N,N,CSR.N,N,N,N,N),
SRET-> List(Y,N,N,N,N,N,N,X,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,N,N,N,CSR.I,N,N,N,N)) SRET-> List(Y,N,N,N,N,N,N,X,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,N,N,N,CSR.I,N,N,N,N))
} }

View File

@ -69,20 +69,18 @@ class IOMSHR(id: Int)(implicit edge: TLEdgeOut, p: Parameters) extends L1HellaCa
} }
val req = Reg(new HellaCacheReq) val req = Reg(new HellaCacheReq)
val req_cmd_sc = req.cmd === M_XSC
val grant_word = Reg(UInt(width = wordBits)) val grant_word = Reg(UInt(width = wordBits))
val s_idle :: s_mem_access :: s_mem_ack :: s_resp :: Nil = Enum(Bits(), 4) val s_idle :: s_mem_access :: s_mem_ack :: s_resp :: Nil = Enum(Bits(), 4)
val state = Reg(init = s_idle) val state = Reg(init = s_idle)
io.req.ready := (state === s_idle) io.req.ready := (state === s_idle)
val storegen = new StoreGen(req.typ, req.addr, req.data, wordBytes) val loadgen = new LoadGen(req.typ, mtSigned(req.typ), req.addr, grant_word, false.B, wordBytes)
val loadgen = new LoadGen(req.typ, mtSigned(req.typ), req.addr, grant_word, req_cmd_sc, wordBytes)
val a_source = UInt(id) val a_source = UInt(id)
val a_address = req.addr val a_address = req.addr
val a_size = storegen.size val a_size = mtSize(req.typ)
val a_data = Fill(beatWords, storegen.data) val a_data = Fill(beatWords, req.data)
val get = edge.Get(a_source, a_address, a_size)._2 val get = edge.Get(a_source, a_address, a_size)._2
val put = edge.Put(a_source, a_address, a_size, a_data)._2 val put = edge.Put(a_source, a_address, a_size, a_data)._2
@ -99,9 +97,10 @@ class IOMSHR(id: Int)(implicit edge: TLEdgeOut, p: Parameters) extends L1HellaCa
M_XA_MAXU -> edge.Arithmetic(a_source, a_address, a_size, a_data, TLAtomics.MAXU)._2)) M_XA_MAXU -> edge.Arithmetic(a_source, a_address, a_size, a_data, TLAtomics.MAXU)._2))
} else { } else {
// If no managers support atomics, assert fail if processor asks for them // If no managers support atomics, assert fail if processor asks for them
assert (!isAMO(req.cmd)) assert(state === s_idle || !isAMO(req.cmd))
Wire(new TLBundleA(edge.bundle)) Wire(new TLBundleA(edge.bundle))
} }
assert(state === s_idle || req.cmd =/= M_XSC)
io.mem_access.valid := (state === s_mem_access) io.mem_access.valid := (state === s_mem_access)
io.mem_access.bits := Mux(isAMO(req.cmd), atomics, Mux(isRead(req.cmd), get, put)) io.mem_access.bits := Mux(isAMO(req.cmd), atomics, Mux(isRead(req.cmd), get, put))
@ -110,7 +109,7 @@ class IOMSHR(id: Int)(implicit edge: TLEdgeOut, p: Parameters) extends L1HellaCa
io.resp.valid := (state === s_resp) io.resp.valid := (state === s_resp)
io.resp.bits := req io.resp.bits := req
io.resp.bits.has_data := isRead(req.cmd) io.resp.bits.has_data := isRead(req.cmd)
io.resp.bits.data := loadgen.data | req_cmd_sc io.resp.bits.data := loadgen.data
io.resp.bits.store_data := req.data io.resp.bits.store_data := req.data
io.resp.bits.replay := Bool(true) io.resp.bits.replay := Bool(true)
@ -696,6 +695,8 @@ class NonBlockingDCacheModule(outer: NonBlockingDCache) extends HellaCacheModule
val s1_read = isRead(s1_req.cmd) val s1_read = isRead(s1_req.cmd)
val s1_write = isWrite(s1_req.cmd) val s1_write = isWrite(s1_req.cmd)
val s1_readwrite = s1_read || s1_write || isPrefetch(s1_req.cmd) val s1_readwrite = s1_read || s1_write || isPrefetch(s1_req.cmd)
// check for unsupported operations
assert(!s1_valid || !s1_req.cmd.isOneOf(M_PWR))
val dtlb = Module(new TLB(log2Ceil(coreDataBytes), nTLBEntries)) val dtlb = Module(new TLB(log2Ceil(coreDataBytes), nTLBEntries))
io.ptw <> dtlb.io.ptw io.ptw <> dtlb.io.ptw
@ -703,7 +704,7 @@ class NonBlockingDCacheModule(outer: NonBlockingDCache) extends HellaCacheModule
dtlb.io.req.bits.sfence.valid := s1_sfence dtlb.io.req.bits.sfence.valid := s1_sfence
dtlb.io.req.bits.sfence.bits.rs1 := s1_req.typ(0) dtlb.io.req.bits.sfence.bits.rs1 := s1_req.typ(0)
dtlb.io.req.bits.sfence.bits.rs2 := s1_req.typ(1) dtlb.io.req.bits.sfence.bits.rs2 := s1_req.typ(1)
dtlb.io.req.bits.sfence.bits.asid := io.cpu.s1_data dtlb.io.req.bits.sfence.bits.asid := io.cpu.s1_data.data
dtlb.io.req.bits.passthrough := s1_req.phys dtlb.io.req.bits.passthrough := s1_req.phys
dtlb.io.req.bits.vaddr := s1_req.addr dtlb.io.req.bits.vaddr := s1_req.addr
dtlb.io.req.bits.instruction := Bool(false) dtlb.io.req.bits.instruction := Bool(false)
@ -736,7 +737,7 @@ class NonBlockingDCacheModule(outer: NonBlockingDCache) extends HellaCacheModule
s2_req.phys := s1_req.phys s2_req.phys := s1_req.phys
s2_req.addr := s1_addr s2_req.addr := s1_addr
when (s1_write) { when (s1_write) {
s2_req.data := Mux(s1_replay, mshrs.io.replay.bits.data, io.cpu.s1_data) s2_req.data := Mux(s1_replay, mshrs.io.replay.bits.data, io.cpu.s1_data.data)
} }
when (s1_recycled) { s2_req.data := s1_req.data } when (s1_recycled) { s2_req.data := s1_req.data }
s2_req.tag := s1_req.tag s2_req.tag := s1_req.tag
@ -927,10 +928,9 @@ class NonBlockingDCacheModule(outer: NonBlockingDCache) extends HellaCacheModule
val s2_data_word_prebypass = s2_data_uncorrected >> Cat(s2_word_idx, Bits(0,log2Up(coreDataBits))) val s2_data_word_prebypass = s2_data_uncorrected >> Cat(s2_word_idx, Bits(0,log2Up(coreDataBits)))
val s2_data_word = Mux(s2_store_bypass, s2_store_bypass_data, s2_data_word_prebypass) val s2_data_word = Mux(s2_store_bypass, s2_store_bypass_data, s2_data_word_prebypass)
val loadgen = new LoadGen(s2_req.typ, mtSigned(s2_req.typ), s2_req.addr, s2_data_word, s2_sc, wordBytes) val loadgen = new LoadGen(s2_req.typ, mtSigned(s2_req.typ), s2_req.addr, s2_data_word, s2_sc, wordBytes)
amoalu.io.addr := s2_req.addr amoalu.io.mask := new StoreGen(s2_req.typ, s2_req.addr, 0.U, xLen/8).mask
amoalu.io.cmd := s2_req.cmd amoalu.io.cmd := s2_req.cmd
amoalu.io.typ := s2_req.typ
amoalu.io.lhs := s2_data_word amoalu.io.lhs := s2_data_word
amoalu.io.rhs := s2_req.data amoalu.io.rhs := s2_req.data

View File

@ -394,7 +394,8 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p)
mem_reg_pc := ex_reg_pc mem_reg_pc := ex_reg_pc
mem_reg_wdata := alu.io.out mem_reg_wdata := alu.io.out
when (ex_ctrl.rxs2 && (ex_ctrl.mem || ex_ctrl.rocc)) { when (ex_ctrl.rxs2 && (ex_ctrl.mem || ex_ctrl.rocc)) {
mem_reg_rs2 := ex_rs(1) val typ = Mux(ex_ctrl.rocc, log2Ceil(xLen/8).U, ex_ctrl.mem_type)
mem_reg_rs2 := new uncore.util.StoreGen(typ, 0.U, ex_rs(1), coreDataBytes).data
} }
} }
@ -625,7 +626,7 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p)
io.dmem.req.bits.phys := Bool(false) io.dmem.req.bits.phys := Bool(false)
io.dmem.req.bits.addr := encodeVirtualAddress(ex_rs(0), alu.io.adder_out) io.dmem.req.bits.addr := encodeVirtualAddress(ex_rs(0), alu.io.adder_out)
io.dmem.invalidate_lr := wb_xcpt io.dmem.invalidate_lr := wb_xcpt
io.dmem.s1_data := Mux(mem_ctrl.fp, io.fpu.store_data, mem_reg_rs2) io.dmem.s1_data.data := Mux(mem_ctrl.fp, io.fpu.store_data, mem_reg_rs2)
io.dmem.s1_kill := killm_common || mem_breakpoint io.dmem.s1_kill := killm_common || mem_breakpoint
io.rocc.cmd.valid := wb_reg_valid && wb_ctrl.rocc && !replay_wb_common io.rocc.cmd.valid := wb_reg_valid && wb_ctrl.rocc && !replay_wb_common

View File

@ -24,7 +24,7 @@ class ScratchpadSlavePort(address: AddressSet)(implicit p: Parameters) extends L
executable = true, executable = true,
supportsArithmetic = if (usingAtomics) TransferSizes(4, coreDataBytes) else TransferSizes.none, supportsArithmetic = if (usingAtomics) TransferSizes(4, coreDataBytes) else TransferSizes.none,
supportsLogical = if (usingAtomics) TransferSizes(4, coreDataBytes) else TransferSizes.none, supportsLogical = if (usingAtomics) TransferSizes(4, coreDataBytes) else TransferSizes.none,
supportsPutPartial = TransferSizes.none, // Can't support PutPartial supportsPutPartial = TransferSizes(1, coreDataBytes),
supportsPutFull = TransferSizes(1, coreDataBytes), supportsPutFull = TransferSizes(1, coreDataBytes),
supportsGet = TransferSizes(1, coreDataBytes), supportsGet = TransferSizes(1, coreDataBytes),
fifoId = Some(0))), // requests handled in FIFO order fifoId = Some(0))), // requests handled in FIFO order
@ -55,6 +55,7 @@ class ScratchpadSlavePort(address: AddressSet)(implicit p: Parameters) extends L
val req = Wire(new HellaCacheReq) val req = Wire(new HellaCacheReq)
req.cmd := MuxLookup(a.opcode, Wire(M_XRD), Array( req.cmd := MuxLookup(a.opcode, Wire(M_XRD), Array(
TLMessages.PutFullData -> M_XWR, TLMessages.PutFullData -> M_XWR,
TLMessages.PutPartialData -> M_PWR,
TLMessages.ArithmeticData -> MuxLookup(a.param, Wire(M_XRD), Array( TLMessages.ArithmeticData -> MuxLookup(a.param, Wire(M_XRD), Array(
TLAtomics.MIN -> M_XA_MIN, TLAtomics.MIN -> M_XA_MIN,
TLAtomics.MAX -> M_XA_MAX, TLAtomics.MAX -> M_XA_MAX,
@ -67,9 +68,8 @@ class ScratchpadSlavePort(address: AddressSet)(implicit p: Parameters) extends L
TLAtomics.AND -> M_XA_AND, TLAtomics.AND -> M_XA_AND,
TLAtomics.SWAP -> M_XA_SWAP)), TLAtomics.SWAP -> M_XA_SWAP)),
TLMessages.Get -> M_XRD)) TLMessages.Get -> M_XRD))
// treat all loads as full words, so bytes appear in correct lane req.typ := a.size
req.typ := Mux(edge.hasData(a), a.size, log2Ceil(coreDataBytes)) req.addr := a.address
req.addr := Mux(edge.hasData(a), a.address, ~(~a.address | (coreDataBytes-1)))
req.tag := UInt(0) req.tag := UInt(0)
req.phys := true req.phys := true
req req
@ -79,14 +79,13 @@ class ScratchpadSlavePort(address: AddressSet)(implicit p: Parameters) extends L
io.dmem.req.valid := (tl_in.a.valid && ready) || state === s_replay io.dmem.req.valid := (tl_in.a.valid && ready) || state === s_replay
tl_in.a.ready := io.dmem.req.ready && ready tl_in.a.ready := io.dmem.req.ready && ready
io.dmem.req.bits := formCacheReq(Mux(state === s_replay, acq, tl_in.a.bits)) io.dmem.req.bits := formCacheReq(Mux(state === s_replay, acq, tl_in.a.bits))
// the TL data is already in the correct byte lane, but the D$ io.dmem.s1_data.data := acq.data
// expects right-justified store data, so that it can steer the bytes. io.dmem.s1_data.mask := acq.mask
io.dmem.s1_data := new LoadGen(acq.size, Bool(false), acq.address(log2Ceil(coreDataBytes)-1,0), acq.data, Bool(false), coreDataBytes).data
io.dmem.s1_kill := false io.dmem.s1_kill := false
io.dmem.invalidate_lr := false io.dmem.invalidate_lr := false
tl_in.d.valid := io.dmem.resp.valid || state === s_grant tl_in.d.valid := io.dmem.resp.valid || state === s_grant
tl_in.d.bits := Mux(acq.opcode === TLMessages.PutFullData, tl_in.d.bits := Mux(acq.opcode.isOneOf(TLMessages.PutFullData, TLMessages.PutPartialData),
edge.AccessAck(acq, UInt(0)), edge.AccessAck(acq, UInt(0)),
edge.AccessAck(acq, UInt(0), UInt(0))) edge.AccessAck(acq, UInt(0), UInt(0)))
tl_in.d.bits.data := Mux(io.dmem.resp.valid, io.dmem.resp.bits.data_raw, acq.data) tl_in.d.bits.data := Mux(io.dmem.resp.valid, io.dmem.resp.bits.data_raw, acq.data)

View File

@ -123,7 +123,7 @@ class SimpleHellaCacheIF(implicit p: Parameters) extends Module
io.cache.invalidate_lr := io.requestor.invalidate_lr io.cache.invalidate_lr := io.requestor.invalidate_lr
io.cache.req <> req_arb.io.out io.cache.req <> req_arb.io.out
io.cache.s1_kill := io.cache.s2_nack io.cache.s1_kill := io.cache.s2_nack
io.cache.s1_data := RegEnable(req_arb.io.out.bits.data, s0_req_fire) io.cache.s1_data.data := RegEnable(req_arb.io.out.bits.data, s0_req_fire)
replayq.io.nack.valid := (io.cache.s2_nack || s2_kill) && s2_req_fire replayq.io.nack.valid := (io.cache.s2_nack || s2_kill) && s2_req_fire
replayq.io.nack.bits := s2_req_tag replayq.io.nack.bits := s2_req_tag

View File

@ -392,7 +392,7 @@ class FPToInt(implicit p: Parameters) extends FPUModule()(p) {
val store = ieee(in.in1) val store = ieee(in.in1)
val toint = Mux(in.rm(0), classify_out, store) val toint = Mux(in.rm(0), classify_out, store)
io.out.bits.store := store io.out.bits.store := Mux(in.singleOut, Fill(xLen/32, store(31, 0)), store)
io.out.bits.toint := Mux(in.singleOut, toint(31, 0).sextTo(xLen), toint) io.out.bits.toint := Mux(in.singleOut, toint(31, 0).sextTo(xLen), toint)
io.out.bits.exc := Bits(0) io.out.bits.exc := Bits(0)

View File

@ -28,7 +28,8 @@ trait MemoryOpConstants {
def M_XA_MINU = UInt("b01110"); def M_XA_MINU = UInt("b01110");
def M_XA_MAXU = UInt("b01111"); def M_XA_MAXU = UInt("b01111");
def M_FLUSH = UInt("b10000") // write back dirty data and cede R/W permissions def M_FLUSH = UInt("b10000") // write back dirty data and cede R/W permissions
def M_PRODUCE = UInt("b10001") // write back dirty data and cede W permissions def M_PWR = UInt("b10001") // partial (masked) store
def M_PRODUCE = UInt("b10010") // write back dirty data and cede W permissions
def M_CLEAN = UInt("b10011") // write back dirty data and retain R/W permissions def M_CLEAN = UInt("b10011") // write back dirty data and retain R/W permissions
def M_SFENCE = UInt("b10100") // flush TLB def M_SFENCE = UInt("b10100") // flush TLB
@ -37,7 +38,7 @@ trait MemoryOpConstants {
def isAMO(cmd: UInt) = isAMOLogical(cmd) || isAMOArithmetic(cmd) def isAMO(cmd: UInt) = isAMOLogical(cmd) || isAMOArithmetic(cmd)
def isPrefetch(cmd: UInt) = cmd === M_PFR || cmd === M_PFW def isPrefetch(cmd: UInt) = cmd === M_PFR || cmd === M_PFW
def isRead(cmd: UInt) = cmd === M_XRD || cmd === M_XLR || cmd === M_XSC || isAMO(cmd) def isRead(cmd: UInt) = cmd === M_XRD || cmd === M_XLR || cmd === M_XSC || isAMO(cmd)
def isWrite(cmd: UInt) = cmd === M_XWR || cmd === M_XSC || isAMO(cmd) def isWrite(cmd: UInt) = cmd === M_XWR || cmd === M_PWR || cmd === M_XSC || isAMO(cmd)
def isWriteIntent(cmd: UInt) = isWrite(cmd) || cmd === M_PFW || cmd === M_XLR def isWriteIntent(cmd: UInt) = isWrite(cmd) || cmd === M_PFW || cmd === M_XLR
} }

View File

@ -31,10 +31,6 @@ class StoreGen(typ: UInt, addr: UInt, dat: UInt, maxSize: Int) {
def wordData = genData(2) def wordData = genData(2)
} }
class StoreGenAligned(typ: UInt, addr: UInt, dat: UInt, maxSize: Int) extends StoreGen(typ, addr, dat, maxSize) {
override def genData(i: Int) = dat
}
class LoadGen(typ: UInt, signed: Bool, addr: UInt, dat: UInt, zero: Bool, maxSize: Int) { class LoadGen(typ: UInt, signed: Bool, addr: UInt, dat: UInt, zero: Bool, maxSize: Int) {
private val size = new StoreGen(typ, addr, dat, maxSize).size private val size = new StoreGen(typ, addr, dat, maxSize).size
@ -54,22 +50,16 @@ class LoadGen(typ: UInt, signed: Bool, addr: UInt, dat: UInt, zero: Bool, maxSiz
def data = genData(0) def data = genData(0)
} }
class AMOALU(operandBits: Int, rhsIsAligned: Boolean = false)(implicit p: Parameters) extends Module { class AMOALU(operandBits: Int)(implicit p: Parameters) extends Module {
require(operandBits == 32 || operandBits == 64) require(operandBits == 32 || operandBits == 64)
val io = new Bundle { val io = new Bundle {
val addr = Bits(INPUT, log2Ceil(operandBits/8)) val mask = UInt(INPUT, operandBits/8)
val cmd = Bits(INPUT, M_SZ) val cmd = Bits(INPUT, M_SZ)
val typ = Bits(INPUT, log2Ceil(log2Ceil(operandBits/8) + 1))
val lhs = Bits(INPUT, operandBits) val lhs = Bits(INPUT, operandBits)
val rhs = Bits(INPUT, operandBits) val rhs = Bits(INPUT, operandBits)
val out = Bits(OUTPUT, operandBits) val out = Bits(OUTPUT, operandBits)
} }
val storegen =
if(rhsIsAligned) new StoreGenAligned(io.typ, io.addr, io.rhs, operandBits/8)
else new StoreGen(io.typ, io.addr, io.rhs, operandBits/8)
val rhs = storegen.wordData
val max = io.cmd === M_XA_MAX || io.cmd === M_XA_MAXU val max = io.cmd === M_XA_MAX || io.cmd === M_XA_MAXU
val min = io.cmd === M_XA_MIN || io.cmd === M_XA_MINU val min = io.cmd === M_XA_MIN || io.cmd === M_XA_MINU
val add = io.cmd === M_XA_ADD val add = io.cmd === M_XA_ADD
@ -77,10 +67,10 @@ class AMOALU(operandBits: Int, rhsIsAligned: Boolean = false)(implicit p: Parame
val logic_xor = io.cmd === M_XA_XOR || io.cmd === M_XA_OR val logic_xor = io.cmd === M_XA_XOR || io.cmd === M_XA_OR
val adder_out = val adder_out =
if (operandBits == 32) io.lhs + rhs if (operandBits == 32) io.lhs + io.rhs
else { else {
val mask = ~UInt(0,64) ^ (io.addr(2) << 31) val mask = ~UInt(0,64) ^ (!io.mask(3) << 31)
(io.lhs & mask) + (rhs & mask) (io.lhs & mask) + (io.rhs & mask)
} }
val less = { val less = {
@ -90,28 +80,29 @@ class AMOALU(operandBits: Int, rhsIsAligned: Boolean = false)(implicit p: Parame
} }
if (operandBits == 32) { if (operandBits == 32) {
Mux(io.lhs(31) === rhs(31), io.lhs < rhs, Mux(sgned, io.lhs(31), io.rhs(31))) Mux(io.lhs(31) === io.rhs(31), io.lhs < io.rhs, Mux(sgned, io.lhs(31), io.rhs(31)))
} else { } else {
val word = !io.typ(0) val cmp_lhs = Mux(!io.mask(4), io.lhs(31), io.lhs(63))
val cmp_lhs = Mux(word && !io.addr(2), io.lhs(31), io.lhs(63)) val cmp_rhs = Mux(!io.mask(4), io.rhs(31), io.rhs(63))
val cmp_rhs = Mux(word && !io.addr(2), rhs(31), rhs(63)) val lt_lo = io.lhs(31,0) < io.rhs(31,0)
val lt_lo = io.lhs(31,0) < rhs(31,0) val lt_hi = io.lhs(63,32) < io.rhs(63,32)
val lt_hi = io.lhs(63,32) < rhs(63,32) val eq_hi = io.lhs(63,32) === io.rhs(63,32)
val eq_hi = io.lhs(63,32) === rhs(63,32) val lt =
val lt = Mux(word, Mux(io.addr(2), lt_hi, lt_lo), lt_hi || eq_hi && lt_lo) Mux(io.mask(4) && io.mask(3), lt_hi || eq_hi && lt_lo,
Mux(io.mask(4), lt_hi, lt_lo))
Mux(cmp_lhs === cmp_rhs, lt, Mux(sgned, cmp_lhs, cmp_rhs)) Mux(cmp_lhs === cmp_rhs, lt, Mux(sgned, cmp_lhs, cmp_rhs))
} }
} }
val minmax = Mux(Mux(less, min, max), io.lhs, storegen.data) val minmax = Mux(Mux(less, min, max), io.lhs, io.rhs)
val logic = val logic =
Mux(logic_and, io.lhs & rhs, 0.U) | Mux(logic_and, io.lhs & io.rhs, 0.U) |
Mux(logic_xor, io.lhs ^ rhs, 0.U) Mux(logic_xor, io.lhs ^ io.rhs, 0.U)
val out = val out =
Mux(add, adder_out, Mux(add, adder_out,
Mux(logic_and || logic_xor, logic, Mux(logic_and || logic_xor, logic,
minmax)) minmax))
val wmask = FillInterleaved(8, storegen.mask) val wmask = FillInterleaved(8, io.mask)
io.out := wmask & out | ~wmask & io.lhs io.out := wmask & out | ~wmask & io.lhs
} }