1
0

D$ cleanup - merge ReplayUnit and MSHRFile

This commit is contained in:
Andrew Waterman 2012-03-01 19:30:56 -08:00
parent 52101373e0
commit 28cacd953f
2 changed files with 48 additions and 76 deletions

View File

@ -102,13 +102,8 @@ class RPQEntry extends Bundle {
val tag = Bits(width = DCACHE_TAG_BITS) val tag = Bits(width = DCACHE_TAG_BITS)
} }
class Replay extends Bundle { class Replay extends RPQEntry {
val idx = Bits(width = IDX_BITS) val idx = Bits(width = IDX_BITS)
val offset = Bits(width = OFFSET_BITS)
val cmd = Bits(width = 4)
val typ = Bits(width = 3)
val sdq_id = UFix(width = log2up(NSDQ))
val tag = Bits(width = DCACHE_TAG_BITS)
val way_oh = Bits(width = NWAYS) val way_oh = Bits(width = NWAYS)
} }
@ -118,6 +113,7 @@ class DataReq extends Bundle {
val cmd = Bits(width = 4) val cmd = Bits(width = 4)
val typ = Bits(width = 3) val typ = Bits(width = 3)
val data = Bits(width = CPU_DATA_BITS) val data = Bits(width = CPU_DATA_BITS)
val way_oh = Bits(width = NWAYS)
} }
class DataArrayReq extends Bundle { class DataArrayReq extends Bundle {
@ -271,8 +267,8 @@ class MSHRFile extends Component {
val req_cmd = Bits(4, INPUT) val req_cmd = Bits(4, INPUT)
val req_type = Bits(3, INPUT) val req_type = Bits(3, INPUT)
val req_tag = Bits(DCACHE_TAG_BITS, INPUT) val req_tag = Bits(DCACHE_TAG_BITS, INPUT)
val req_sdq_id = UFix(log2up(NSDQ), INPUT)
val req_way_oh = Bits(NWAYS, INPUT) val req_way_oh = Bits(NWAYS, INPUT)
val req_data = Bits(CPU_DATA_BITS, INPUT)
val mem_resp_val = Bool(INPUT) val mem_resp_val = Bool(INPUT)
val mem_resp_tag = Bits(MEM_TAG_BITS, INPUT) val mem_resp_tag = Bits(MEM_TAG_BITS, INPUT)
@ -283,16 +279,27 @@ class MSHRFile extends Component {
val mem_req = (new ioDecoupled) { new TransactionInit }.flip() val mem_req = (new ioDecoupled) { new TransactionInit }.flip()
val meta_req = (new ioDecoupled) { new MetaArrayArrayReq() }.flip() val meta_req = (new ioDecoupled) { new MetaArrayArrayReq() }.flip()
val replay = (new ioDecoupled) { new Replay() }.flip() val data_req = (new ioDecoupled) { new DataReq() }.flip()
val cpu_resp_val = Bool(OUTPUT)
val cpu_resp_tag = Bits(DCACHE_TAG_BITS, OUTPUT)
} }
val sdq_val = Reg(resetVal = Bits(0, NSDQ))
val sdq_alloc_id = PriorityEncoder(~sdq_val(NSDQ-1,0))
val sdq_rdy = !sdq_val.andR
val (req_read, req_write) = cpuCmdToRW(io.req_cmd)
val sdq_enq = io.req_val && io.req_rdy && req_write
val sdq = Mem(NSDQ, sdq_enq, sdq_alloc_id, io.req_data)
sdq.setReadLatency(1);
sdq.setTarget('inst)
val tag_mux = (new Mux1H(NMSHR)){ Bits(width = TAG_BITS) } val tag_mux = (new Mux1H(NMSHR)){ Bits(width = TAG_BITS) }
val mem_resp_idx_mux = (new Mux1H(NMSHR)){ Bits(width = IDX_BITS) } val mem_resp_idx_mux = (new Mux1H(NMSHR)){ Bits(width = IDX_BITS) }
val mem_resp_way_oh_mux = (new Mux1H(NMSHR)){ Bits(width = NWAYS) } val mem_resp_way_oh_mux = (new Mux1H(NMSHR)){ Bits(width = NWAYS) }
val meta_req_arb = (new Arbiter(NMSHR)) { new MetaArrayArrayReq() } val meta_req_arb = (new Arbiter(NMSHR)) { new MetaArrayArrayReq() }
val mem_req_arb = (new Arbiter(NMSHR)) { new TransactionInit } val mem_req_arb = (new Arbiter(NMSHR)) { new TransactionInit }
val replay_arb = (new Arbiter(NMSHR)) { new Replay() } val replay_arb = (new Arbiter(NMSHR)) { new Replay() }
val alloc_arb = (new Arbiter(NMSHR)) { Bool() } val alloc_arb = (new Arbiter(NMSHR)) { Bool() }
val tag_match = tag_mux.io.out === io.req_ppn val tag_match = tag_mux.io.out === io.req_ppn
@ -311,14 +318,14 @@ class MSHRFile extends Component {
alloc_arb.io.in(i).valid := mshr.io.req_pri_rdy alloc_arb.io.in(i).valid := mshr.io.req_pri_rdy
mshr.io.req_pri_val := alloc_arb.io.in(i).ready mshr.io.req_pri_val := alloc_arb.io.in(i).ready
mshr.io.req_sec_val := io.req_val && tag_match mshr.io.req_sec_val := io.req_val && sdq_rdy && tag_match
mshr.io.req_ppn := io.req_ppn mshr.io.req_ppn := io.req_ppn
mshr.io.req_tag := io.req_tag mshr.io.req_tag := io.req_tag
mshr.io.req_idx := io.req_idx mshr.io.req_idx := io.req_idx
mshr.io.req_offset := io.req_offset mshr.io.req_offset := io.req_offset
mshr.io.req_cmd := io.req_cmd mshr.io.req_cmd := io.req_cmd
mshr.io.req_type := io.req_type mshr.io.req_type := io.req_type
mshr.io.req_sdq_id := io.req_sdq_id mshr.io.req_sdq_id := sdq_alloc_id
mshr.io.req_way_oh := io.req_way_oh mshr.io.req_way_oh := io.req_way_oh
mshr.io.meta_req <> meta_req_arb.io.in(i) mshr.io.meta_req <> meta_req_arb.io.in(i)
@ -338,57 +345,27 @@ class MSHRFile extends Component {
idx_match = idx_match || mshr.io.idx_match idx_match = idx_match || mshr.io.idx_match
} }
alloc_arb.io.out.ready := io.req_val && !idx_match alloc_arb.io.out.ready := io.req_val && sdq_rdy && !idx_match
meta_req_arb.io.out <> io.meta_req meta_req_arb.io.out <> io.meta_req
mem_req_arb.io.out <> io.mem_req mem_req_arb.io.out <> io.mem_req
replay_arb.io.out <> io.replay
io.req_rdy := Mux(idx_match, tag_match && sec_rdy, pri_rdy) io.req_rdy := Mux(idx_match, tag_match && sec_rdy, pri_rdy) && sdq_rdy
io.mem_resp_idx := mem_resp_idx_mux.io.out io.mem_resp_idx := mem_resp_idx_mux.io.out
io.mem_resp_way_oh := mem_resp_way_oh_mux.io.out io.mem_resp_way_oh := mem_resp_way_oh_mux.io.out
io.fence_rdy := !fence io.fence_rdy := !fence
}
class ReplayUnit extends Component { val replay = Queue(replay_arb.io.out, 1, pipe = true)
val io = new Bundle { replay.ready := io.data_req.ready
val sdq_enq = (new ioDecoupled) { Bits(width = CPU_DATA_BITS) } io.data_req <> replay
val sdq_id = UFix(log2up(NSDQ), OUTPUT)
val way_oh = Bits(NWAYS, OUTPUT)
val replay = (new ioDecoupled) { new Replay() }
val data_req = (new ioDecoupled) { new DataReq() }.flip()
val cpu_resp_val = Bool(OUTPUT)
val cpu_resp_tag = Bits(DCACHE_TAG_BITS, OUTPUT)
}
val sdq_val = Reg(resetVal = Bits(0, NSDQ)) val (replay_read, replay_write) = cpuCmdToRW(replay.bits.cmd)
val sdq_alloc_id = PriorityEncoder(~sdq_val(NSDQ-1,0)) val sdq_free = replay.valid && replay.ready && replay_write
sdq_val := sdq_val & ~(sdq_free.toUFix << replay.bits.sdq_id) | (sdq_enq.toUFix << sdq_alloc_id)
io.data_req.bits.data := sdq.read(Mux(replay.valid && !replay.ready, replay.bits.sdq_id, replay_arb.io.out.bits.sdq_id))
val rpq = Queue(io.replay, 1, pipe = true) io.cpu_resp_val := Reg(replay.valid && replay.ready && replay_read, resetVal = Bool(false))
rpq.ready := io.data_req.ready io.cpu_resp_tag := Reg(replay.bits.tag)
val (rp_read, rp_write) = cpuCmdToRW(rpq.bits.cmd)
val sdq_wen = io.sdq_enq.valid && io.sdq_enq.ready
val sdq = Mem(NSDQ, sdq_wen, sdq_alloc_id, io.sdq_enq.bits)
sdq.setReadLatency(1);
sdq.setTarget('inst)
val sdq_free = rpq.valid && rpq.ready && rp_write
sdq_val := sdq_val & ~(sdq_free.toUFix << rpq.bits.sdq_id) | (sdq_wen.toUFix << sdq_alloc_id)
io.sdq_enq.ready := !sdq_val.andR
io.sdq_id := sdq_alloc_id
io.data_req.valid := rpq.valid
io.way_oh := rpq.bits.way_oh
io.data_req.bits.idx := rpq.bits.idx
io.data_req.bits.offset := rpq.bits.offset
io.data_req.bits.cmd := rpq.bits.cmd
io.data_req.bits.typ := rpq.bits.typ
io.data_req.bits.data := sdq.read(Mux(rpq.valid && !rpq.ready, rpq.bits.sdq_id, io.replay.bits.sdq_id))
io.cpu_resp_val := Reg(rpq.valid && rpq.ready && rp_read, resetVal = Bool(false))
io.cpu_resp_tag := Reg(rpq.bits.tag)
} }
class WritebackUnit extends Component { class WritebackUnit extends Component {
@ -705,9 +682,8 @@ class HellaCacheUniproc extends HellaCache with ThreeStateIncoherence {
val r_req_write = r_req_store || r_req_amo val r_req_write = r_req_store || r_req_amo
val r_req_readwrite = r_req_read || r_req_write || r_req_prefetch val r_req_readwrite = r_req_read || r_req_write || r_req_prefetch
// replay unit val mshr = new MSHRFile()
val replayer = new ReplayUnit() val replay_amo_val = mshr.io.data_req.valid && mshr.io.data_req.bits.cmd(3).toBool
val replay_amo_val = replayer.io.data_req.valid && replayer.io.data_req.bits.cmd(3).toBool
when (io.cpu.req_val) { when (io.cpu.req_val) {
r_cpu_req_idx := io.cpu.req_idx r_cpu_req_idx := io.cpu.req_idx
@ -716,11 +692,11 @@ class HellaCacheUniproc extends HellaCache with ThreeStateIncoherence {
r_cpu_req_tag := io.cpu.req_tag r_cpu_req_tag := io.cpu.req_tag
} }
when (replay_amo_val) { when (replay_amo_val) {
r_cpu_req_idx := Cat(replayer.io.data_req.bits.idx, replayer.io.data_req.bits.offset) r_cpu_req_idx := Cat(mshr.io.data_req.bits.idx, mshr.io.data_req.bits.offset)
r_cpu_req_cmd := replayer.io.data_req.bits.cmd r_cpu_req_cmd := mshr.io.data_req.bits.cmd
r_cpu_req_type := replayer.io.data_req.bits.typ r_cpu_req_type := mshr.io.data_req.bits.typ
r_amo_replay_data := replayer.io.data_req.bits.data r_amo_replay_data := mshr.io.data_req.bits.data
r_way_oh := replayer.io.way_oh r_way_oh := mshr.io.data_req.bits.way_oh
} }
val cpu_req_data = Mux(r_replay_amo, r_amo_replay_data, io.cpu.req_data) val cpu_req_data = Mux(r_replay_amo, r_amo_replay_data, io.cpu.req_data)
@ -816,7 +792,7 @@ class HellaCacheUniproc extends HellaCache with ThreeStateIncoherence {
data_arb.io.in(2).bits.way_en := p_store_way_oh data_arb.io.in(2).bits.way_en := p_store_way_oh
val drain_store = drain_store_val && data_arb.io.in(2).ready val drain_store = drain_store_val && data_arb.io.in(2).ready
val p_amo = Reg(resetVal = Bool(false)) val p_amo = Reg(resetVal = Bool(false))
val p_store_rdy = !(p_store_valid && !drain_store) && !(replayer.io.data_req.valid || r_replay_amo || p_amo) val p_store_rdy = !(p_store_valid && !drain_store) && !(mshr.io.data_req.valid || r_replay_amo || p_amo)
p_amo := tag_hit && r_req_amo && p_store_rdy && !p_store_match || r_replay_amo p_amo := tag_hit && r_req_amo && p_store_rdy && !p_store_match || r_replay_amo
p_store_valid := p_store_valid && !drain_store || (tag_hit && r_req_store && p_store_rdy) || p_amo p_store_valid := p_store_valid && !drain_store || (tag_hit && r_req_store && p_store_rdy) || p_amo
@ -852,39 +828,35 @@ class HellaCacheUniproc extends HellaCache with ThreeStateIncoherence {
} }
// miss handling // miss handling
val mshr = new MSHRFile() mshr.io.req_val := tag_miss && r_req_readwrite && (!needs_writeback || wb_rdy)
mshr.io.req_val := tag_miss && r_req_readwrite && (!needs_writeback || wb_rdy) && (!r_req_write || replayer.io.sdq_enq.ready)
mshr.io.req_ppn := cpu_req_tag mshr.io.req_ppn := cpu_req_tag
mshr.io.req_idx := r_cpu_req_idx(indexmsb,indexlsb) mshr.io.req_idx := r_cpu_req_idx(indexmsb,indexlsb)
mshr.io.req_tag := r_cpu_req_tag mshr.io.req_tag := r_cpu_req_tag
mshr.io.req_offset := r_cpu_req_idx(offsetmsb,0) mshr.io.req_offset := r_cpu_req_idx(offsetmsb,0)
mshr.io.req_cmd := r_cpu_req_cmd mshr.io.req_cmd := r_cpu_req_cmd
mshr.io.req_type := r_cpu_req_type mshr.io.req_type := r_cpu_req_type
mshr.io.req_sdq_id := replayer.io.sdq_id
mshr.io.req_way_oh := replaced_way_oh mshr.io.req_way_oh := replaced_way_oh
mshr.io.req_data := cpu_req_data
mshr.io.mem_resp_val := refill_val && (~rr_count === UFix(0)) mshr.io.mem_resp_val := refill_val && (~rr_count === UFix(0))
mshr.io.mem_resp_tag := io.mem.xact_rep.bits.tile_xact_id mshr.io.mem_resp_tag := io.mem.xact_rep.bits.tile_xact_id
mshr.io.mem_req <> wb.io.refill_req mshr.io.mem_req <> wb.io.refill_req
mshr.io.meta_req <> meta_arb.io.in(1) mshr.io.meta_req <> meta_arb.io.in(1)
mshr.io.replay <> replayer.io.replay
replayer.io.sdq_enq.valid := tag_miss && r_req_write && (!needs_writeback || wb_rdy) && mshr.io.req_rdy
replayer.io.sdq_enq.bits := cpu_req_data
data_arb.io.in(0).bits.inner_req.idx := mshr.io.mem_resp_idx data_arb.io.in(0).bits.inner_req.idx := mshr.io.mem_resp_idx
data_arb.io.in(0).bits.way_en := mshr.io.mem_resp_way_oh data_arb.io.in(0).bits.way_en := mshr.io.mem_resp_way_oh
replacer.io.pick_new_way := !io.cpu.req_kill && mshr.io.req_val && mshr.io.req_rdy replacer.io.pick_new_way := !io.cpu.req_kill && mshr.io.req_val && mshr.io.req_rdy
// replays // replays
val replay = replayer.io.data_req.bits val replay = mshr.io.data_req.bits
val stall_replay = r_replay_amo || p_amo || p_store_valid val stall_replay = r_replay_amo || p_amo || p_store_valid
val replay_val = replayer.io.data_req.valid val replay_val = mshr.io.data_req.valid
val replay_rdy = data_arb.io.in(1).ready && !stall_replay val replay_rdy = data_arb.io.in(1).ready && !stall_replay
val replay_fire = replay_val && replay_rdy val replay_fire = replay_val && replay_rdy
data_arb.io.in(1).bits.inner_req.offset := replay.offset(offsetmsb,ramindexlsb) data_arb.io.in(1).bits.inner_req.offset := replay.offset(offsetmsb,ramindexlsb)
data_arb.io.in(1).bits.inner_req.idx := replay.idx data_arb.io.in(1).bits.inner_req.idx := replay.idx
data_arb.io.in(1).bits.inner_req.rw := replay.cmd === M_XWR data_arb.io.in(1).bits.inner_req.rw := replay.cmd === M_XWR
data_arb.io.in(1).valid := replay_val && !stall_replay data_arb.io.in(1).valid := replay_val && !stall_replay
data_arb.io.in(1).bits.way_en := replayer.io.way_oh data_arb.io.in(1).bits.way_en := mshr.io.data_req.bits.way_oh
replayer.io.data_req.ready := replay_rdy mshr.io.data_req.ready := replay_rdy
r_replay_amo := replay_amo_val && replay_rdy r_replay_amo := replay_amo_val && replay_rdy
// store write mask generation. // store write mask generation.
@ -932,17 +904,17 @@ class HellaCacheUniproc extends HellaCache with ThreeStateIncoherence {
val pending_fence = Reg(resetVal = Bool(false)) val pending_fence = Reg(resetVal = Bool(false))
pending_fence := (r_cpu_req_val_ && r_req_fence || pending_fence) && !flush_rdy pending_fence := (r_cpu_req_val_ && r_req_fence || pending_fence) && !flush_rdy
val nack_hit = p_store_match || replay_val || r_req_write && !p_store_rdy val nack_hit = p_store_match || replay_val || r_req_write && !p_store_rdy
val nack_miss = needs_writeback && !wb_rdy || !mshr.io.req_rdy || r_req_write && !replayer.io.sdq_enq.ready val nack_miss = needs_writeback && !wb_rdy || !mshr.io.req_rdy
val nack_flush = !flush_rdy && (r_req_fence || r_req_flush) || val nack_flush = !flush_rdy && (r_req_fence || r_req_flush) ||
!flushed && r_req_flush !flushed && r_req_flush
val nack = early_nack || r_req_readwrite && Mux(tag_match, nack_hit, nack_miss) || nack_flush val nack = early_nack || r_req_readwrite && Mux(tag_match, nack_hit, nack_miss) || nack_flush
io.cpu.req_rdy := flusher.io.req.ready && !(r_cpu_req_val_ && r_req_flush) && !pending_fence io.cpu.req_rdy := flusher.io.req.ready && !(r_cpu_req_val_ && r_req_flush) && !pending_fence
io.cpu.resp_nack := r_cpu_req_val_ && !io.cpu.req_kill && nack io.cpu.resp_nack := r_cpu_req_val_ && !io.cpu.req_kill && nack
io.cpu.resp_val := (tag_hit && !nack_hit && r_req_read) || replayer.io.cpu_resp_val io.cpu.resp_val := (tag_hit && !nack_hit && r_req_read) || mshr.io.cpu_resp_val
io.cpu.resp_replay := replayer.io.cpu_resp_val io.cpu.resp_replay := mshr.io.cpu_resp_val
io.cpu.resp_miss := tag_miss && !nack_miss && r_req_read io.cpu.resp_miss := tag_miss && !nack_miss && r_req_read
io.cpu.resp_tag := Mux(replayer.io.cpu_resp_val, replayer.io.cpu_resp_tag, r_cpu_req_tag) io.cpu.resp_tag := Mux(mshr.io.cpu_resp_val, mshr.io.cpu_resp_tag, r_cpu_req_tag)
io.cpu.resp_type := loadgen.io.typ io.cpu.resp_type := loadgen.io.typ
io.cpu.resp_data := loadgen.io.dout io.cpu.resp_data := loadgen.io.dout
io.cpu.resp_data_subword := loadgen.io.r_dout_subword io.cpu.resp_data_subword := loadgen.io.r_dout_subword

View File

@ -30,7 +30,7 @@ class Top() extends Component {
// connect tile to hub // connect tile to hub
hub.io.tiles(0).xact_init <> Queue(arbiter.io.mem.xact_init) hub.io.tiles(0).xact_init <> Queue(arbiter.io.mem.xact_init)
hub.io.tiles(0).xact_init_data <> Queue(arbiter.io.mem.xact_init_data) hub.io.tiles(0).xact_init_data <> Queue(arbiter.io.mem.xact_init_data)
arbiter.io.mem.xact_rep <> PipeReg(hub.io.tiles(0).xact_rep) arbiter.io.mem.xact_rep <> Pipe(hub.io.tiles(0).xact_rep)
// connect hub to memory // connect hub to memory
io.mem.req_cmd <> Queue(hub.io.mem.req_cmd) io.mem.req_cmd <> Queue(hub.io.mem.req_cmd)
io.mem.req_data <> Queue(hub.io.mem.req_data) io.mem.req_data <> Queue(hub.io.mem.req_data)