From 012da6002e4b503e3540e9ae15bd578ad68c8bee Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Wed, 29 Feb 2012 03:08:04 -0800 Subject: [PATCH] replace tile memory interface with ioTileLink work in progress towards coherent HTIF. for now, requests are incoherently passed through a null coherence hub. --- rocket/src/main/scala/arbiter.scala | 89 +++++++++------------ rocket/src/main/scala/coherence.scala | 31 ++++--- rocket/src/main/scala/consts.scala | 4 +- rocket/src/main/scala/htif.scala | 23 +++--- rocket/src/main/scala/icache.scala | 27 ++++--- rocket/src/main/scala/icache_prefetch.scala | 37 ++++----- rocket/src/main/scala/nbdcache.scala | 66 +++++++-------- rocket/src/main/scala/queues.scala | 4 +- rocket/src/main/scala/top.scala | 36 ++++++--- rocket/src/main/scala/util.scala | 6 ++ 10 files changed, 163 insertions(+), 160 deletions(-) diff --git a/rocket/src/main/scala/arbiter.scala b/rocket/src/main/scala/arbiter.scala index c9b442f1..e323cf7a 100644 --- a/rocket/src/main/scala/arbiter.scala +++ b/rocket/src/main/scala/arbiter.scala @@ -4,75 +4,64 @@ import Chisel._; import Node._; import Constants._; -class ioMem() extends Bundle -{ - val req_val = Bool(OUTPUT); - val req_rdy = Bool(INPUT); - val req_rw = Bool(OUTPUT); - val req_addr = UFix(PADDR_BITS - OFFSET_BITS, OUTPUT); - val req_tag = Bits(MEM_TAG_BITS, OUTPUT); - - val req_data_val = Bool(OUTPUT); - val req_data_rdy = Bool(INPUT); - val req_data_bits = Bits(MEM_DATA_BITS, OUTPUT); - - val resp_val = Bool(INPUT); - val resp_tag = Bits(MEM_TAG_BITS, INPUT); - val resp_data = Bits(MEM_DATA_BITS, INPUT); -} - -class ioMemArbiter(n: Int) extends Bundle() { - val mem = new ioMem(); - val requestor = Vec(n) { new ioMem().flip() } -} - class rocketMemArbiter(n: Int) extends Component { - val io = new ioMemArbiter(n); + val io = new Bundle { + val mem = new ioTileLink + val requestor = Vec(n) { new ioTileLink().flip } + } var req_val = Bool(false) - var req_rdy = io.mem.req_rdy + var req_rdy = io.mem.xact_init.ready for (i <- 0 until n) { - io.requestor(i).req_rdy := req_rdy - req_val = req_val || io.requestor(i).req_val - req_rdy = req_rdy && !io.requestor(i).req_val + io.requestor(i).xact_init.ready := req_rdy + req_val = req_val || io.requestor(i).xact_init.valid + req_rdy = req_rdy && !io.requestor(i).xact_init.valid } + // if more than one requestor at a time can write back, the data + // arbiter needs to be made stateful: one xact's write data must + // be sent to the memory system contiguously. var req_data_val = Bool(false) - var req_data_rdy = io.mem.req_data_rdy + var req_data_rdy = io.mem.xact_init_data.ready for (i <- 0 until n) { - io.requestor(i).req_data_rdy := req_data_rdy - req_data_val = req_data_val || io.requestor(i).req_data_val - req_data_rdy = req_data_rdy && !io.requestor(i).req_data_val + io.requestor(i).xact_init_data.ready := req_data_rdy + req_data_val = req_data_val || io.requestor(i).xact_init_data.valid + req_data_rdy = req_data_rdy && !io.requestor(i).xact_init_data.valid } - var req_rw = io.requestor(n-1).req_rw - var req_addr = io.requestor(n-1).req_addr - var req_tag = Cat(io.requestor(n-1).req_tag, UFix(n-1, log2up(n))) - for (i <- n-1 to 0 by -1) + var req_bits = Wire() { new TransactionInit } + req_bits := io.requestor(n-1).xact_init.bits + req_bits.tile_xact_id := Cat(io.requestor(n-1).xact_init.bits.tile_xact_id, UFix(n-1, log2up(n))) + for (i <- n-2 to 0 by -1) { - req_rw = Mux(io.requestor(i).req_val, io.requestor(i).req_rw, req_rw) - req_addr = Mux(io.requestor(i).req_val, io.requestor(i).req_addr, req_addr) - req_tag = Mux(io.requestor(i).req_val, Cat(io.requestor(i).req_tag, UFix(i, log2up(n))), req_tag) + var my_req_bits = Wire() { new TransactionInit } + my_req_bits := io.requestor(i).xact_init.bits + my_req_bits.tile_xact_id := Cat(io.requestor(i).xact_init.bits.tile_xact_id, UFix(i, log2up(n))) + + req_bits = Mux(io.requestor(i).xact_init.valid, my_req_bits, req_bits) } - var req_data_bits = io.requestor(n-1).req_data_bits - for (i <- n-1 to 0 by -1) - req_data_bits = Mux(io.requestor(i).req_data_val, io.requestor(i).req_data_bits, req_data_bits) + var req_data_bits = io.requestor(n-1).xact_init_data.bits + for (i <- n-2 to 0 by -1) + req_data_bits = Mux(io.requestor(i).xact_init_data.valid, io.requestor(i).xact_init_data.bits, req_data_bits) - io.mem.req_val := req_val - io.mem.req_rw := req_rw - io.mem.req_addr := req_addr - io.mem.req_tag := req_tag + io.mem.xact_init.valid := req_val + io.mem.xact_init.bits := req_bits - io.mem.req_data_val := req_data_val - io.mem.req_data_bits := req_data_bits + io.mem.xact_init_data.valid := req_data_val + io.mem.xact_init_data.bits := req_data_bits for (i <- 0 until n) { - io.requestor(i).resp_val := io.mem.resp_val && io.mem.resp_tag(log2up(n)-1,0) === UFix(i) - io.requestor(i).resp_data := io.mem.resp_data - io.requestor(i).resp_tag := io.mem.resp_tag >> UFix(log2up(n)) + val tag = io.mem.xact_rep.bits.tile_xact_id + io.requestor(i).xact_rep.valid := io.mem.xact_rep.valid && tag(log2up(n)-1,0) === UFix(i) + io.requestor(i).xact_rep.bits.data := io.mem.xact_rep.bits.data + io.requestor(i).xact_rep.bits.t_type := io.mem.xact_rep.bits.t_type + io.requestor(i).xact_rep.bits.has_data := io.mem.xact_rep.bits.has_data + io.requestor(i).xact_rep.bits.tile_xact_id := tag >> UFix(log2up(n)) + io.requestor(i).xact_rep.bits.global_xact_id := io.mem.xact_rep.bits.global_xact_id } + io.mem.xact_rep.ready := Bool(true) // XXX we shouldn't have xact_rep.ready } diff --git a/rocket/src/main/scala/coherence.scala b/rocket/src/main/scala/coherence.scala index 75796a9d..9fbd69e9 100644 --- a/rocket/src/main/scala/coherence.scala +++ b/rocket/src/main/scala/coherence.scala @@ -10,22 +10,21 @@ class MemData extends Bundle { class MemReqCmd() extends Bundle { val rw = Bool() - val addr = UFix(PADDR_BITS - OFFSET_BITS) - val tag = Bits(MEM_TAG_BITS) + val addr = UFix(width = PADDR_BITS - OFFSET_BITS) + val tag = Bits(width = MEM_TAG_BITS) } class MemResp () extends Bundle { - val tag = Bits(MEM_TAG_BITS) + val tag = Bits(width = MEM_TAG_BITS) val data = Bits(width = MEM_DATA_BITS) - val valid = Bool() } -class ioMemHub() extends Bundle +class ioMem() extends Bundle { val req_cmd = (new ioDecoupled) { new MemReqCmd() }.flip val req_data = (new ioDecoupled) { new MemData() }.flip - val resp = new MemResp() + val resp = (new ioValid) { new MemResp() } } class HubMemReq extends Bundle { @@ -49,7 +48,7 @@ class TransactionInit extends Bundle { val t_type = Bits(width = TTYPE_BITS) val has_data = Bool() val tile_xact_id = Bits(width = TILE_XACT_ID_BITS) - val address = Bits(width = PADDR_BITS) + val address = UFix(width = PADDR_BITS) } class TransactionInitData extends MemData @@ -348,8 +347,8 @@ abstract class CoherenceHub extends Component with CoherencePolicy class CoherenceHubNull extends Component { val io = new Bundle { - val tile = new ioTileLink() - val mem = new ioMemHub() + val tile = new ioTileLink().flip + val mem = new ioMem } val x_init = io.tile.xact_init @@ -362,11 +361,11 @@ class CoherenceHubNull extends Component { io.mem.req_data <> io.tile.xact_init_data val x_rep = io.tile.xact_rep - x_rep.bits.t_type := Mux(is_write, X_WRITE_UNCACHED, X_READ_EXCLUSIVE) - x_rep.bits.tile_xact_id := Mux(is_write, x_init.bits.tile_xact_id, io.mem.resp.tag) + x_rep.bits.t_type := Mux(io.mem.resp.valid, X_READ_EXCLUSIVE, X_WRITE_UNCACHED) + x_rep.bits.tile_xact_id := Mux(io.mem.resp.valid, io.mem.resp.bits.tag, x_init.bits.tile_xact_id) x_rep.bits.global_xact_id := UFix(0) // don't care - x_rep.bits.data := io.mem.resp.data - x_rep.valid := io.mem.resp.valid || is_write + x_rep.bits.data := io.mem.resp.bits.data + x_rep.valid := io.mem.resp.valid || x_init.valid && is_write } @@ -388,7 +387,7 @@ class CoherenceHubNoDir extends CoherenceHub { val io = new Bundle { val tiles = Vec(NTILES) { new ioTileLink() } - val mem = new ioMemHub + val mem = new ioMem } val trackerList = (0 until NGLOBAL_XACTS).map(new XactTracker(_)) @@ -427,12 +426,12 @@ class CoherenceHubNoDir extends CoherenceHub { // Reply to initial requestor // Forward memory responses from mem to tile - val idx = io.mem.resp.tag + val idx = io.mem.resp.bits.tag for( j <- 0 until NTILES ) { io.tiles(j).xact_rep.bits.t_type := getTransactionReplyType(t_type_arr.read(idx), sh_count_arr.read(idx)) io.tiles(j).xact_rep.bits.tile_xact_id := tile_xact_id_arr.read(idx) io.tiles(j).xact_rep.bits.global_xact_id := idx - io.tiles(j).xact_rep.bits.data := io.mem.resp.data + io.tiles(j).xact_rep.bits.data := io.mem.resp.bits.data io.tiles(j).xact_rep.valid := (UFix(j) === init_tile_id_arr.read(idx)) && (io.mem.resp.valid || send_x_rep_ack_arr.read(idx)) } // If there were a ready signal due to e.g. intervening network use: diff --git a/rocket/src/main/scala/consts.scala b/rocket/src/main/scala/consts.scala index 17294bae..6377d8c1 100644 --- a/rocket/src/main/scala/consts.scala +++ b/rocket/src/main/scala/consts.scala @@ -182,7 +182,7 @@ object Constants val NTILES = 1 val COHERENCE_DATA_BITS = (1 << OFFSET_BITS)*8 val TILE_ID_BITS = 1 - val TILE_XACT_ID_BITS = 1 // log2(NMSHR) + val TILE_XACT_ID_BITS = log2up(NMSHR)+2 val GLOBAL_XACT_ID_BITS = 4 val NGLOBAL_XACTS = 1 << GLOBAL_XACT_ID_BITS @@ -201,7 +201,7 @@ object Constants val MEM_TAG_BITS = 4 val MEM_DATA_BITS = 128 val REFILL_CYCLES = (1 << OFFSET_BITS)*8/MEM_DATA_BITS - require(MEM_TAG_BITS >= max(log2up(NMSHR)+1, GLOBAL_XACT_ID_BITS)) + require(MEM_TAG_BITS >= max(TILE_XACT_ID_BITS, GLOBAL_XACT_ID_BITS)) val DTLB_ENTRIES = 8; val ITLB_ENTRIES = 8; diff --git a/rocket/src/main/scala/htif.scala b/rocket/src/main/scala/htif.scala index 04e2bf5a..551f3d25 100644 --- a/rocket/src/main/scala/htif.scala +++ b/rocket/src/main/scala/htif.scala @@ -26,7 +26,7 @@ class rocketHTIF(w: Int, ncores: Int) extends Component val io = new Bundle { val host = new ioHost(w) val cpu = Vec(ncores) { new ioHTIF().flip() } - val mem = new ioMem + val mem = new ioTileLink } val short_request_bits = 64 @@ -93,11 +93,11 @@ class rocketHTIF(w: Int, ncores: Int) extends Component } val mem_cnt = Reg(resetVal = UFix(0, log2up(REFILL_CYCLES))) - when (state === state_mem_req && io.mem.req_rdy) { + when (state === state_mem_req && io.mem.xact_init.ready) { state := Mux(cmd === cmd_writemem, state_mem_wdata, state_mem_rdata) } - when (state === state_mem_wdata && io.mem.req_data_rdy || - state === state_mem_rdata && io.mem.resp_val) { + when (state === state_mem_wdata && io.mem.xact_init_data.ready || + state === state_mem_rdata && io.mem.xact_rep.valid) { when (mem_cnt.andR) { state := state_tx } @@ -112,16 +112,17 @@ class rocketHTIF(w: Int, ncores: Int) extends Component var mem_req_data: Bits = null for (i <- 0 until MEM_DATA_BITS/short_request_bits) { val idx = Cat(mem_cnt, UFix(i, log2up(MEM_DATA_BITS/short_request_bits))) - packet_ram.write(idx, io.mem.resp_data((i+1)*short_request_bits-1, i*short_request_bits), - state === state_mem_rdata && io.mem.resp_val) + packet_ram.write(idx, io.mem.xact_rep.bits.data((i+1)*short_request_bits-1, i*short_request_bits), + state === state_mem_rdata && io.mem.xact_rep.valid) mem_req_data = Cat(packet_ram.read(idx), mem_req_data) } - io.mem.req_val := state === state_mem_req - io.mem.req_rw := cmd === cmd_writemem - io.mem.req_addr := addr >> UFix(OFFSET_BITS-3) + io.mem.xact_init.valid := state === state_mem_req + io.mem.xact_init.bits.t_type := Mux(cmd === cmd_writemem, X_WRITE_UNCACHED, X_READ_UNCACHED) + io.mem.xact_init.bits.has_data := cmd === cmd_writemem + io.mem.xact_init.bits.address := addr >> UFix(OFFSET_BITS-3) - io.mem.req_data_val := state === state_mem_wdata - io.mem.req_data_bits := mem_req_data + io.mem.xact_init_data.valid:= state === state_mem_wdata + io.mem.xact_init_data.bits.data := mem_req_data pcr_done := Bool(false) val pcr_mux = (new Mux1H(ncores)) { Bits(width = 64) } diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index 92bfa0f6..3d18f974 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -20,7 +20,7 @@ class ioImem(view: List[String] = null) extends Bundle (view) class ioRocketICache extends Bundle() { val cpu = new ioImem(); - val mem = new ioMem + val mem = new ioTileLink } // basic direct mapped instruction cache @@ -75,7 +75,7 @@ class rocketICache(sets: Int, assoc: Int) extends Component { // refill counter val refill_count = Reg(resetVal = UFix(0, rf_cnt_bits)); - when (io.mem.resp_val) { + when (io.mem.xact_rep.valid) { refill_count := refill_count + UFix(1); } @@ -84,7 +84,7 @@ class rocketICache(sets: Int, assoc: Int) extends Component { val tag_addr = Mux((state === s_refill_wait), r_cpu_req_idx(indexmsb,indexlsb), io.cpu.req_idx(indexmsb,indexlsb)).toUFix; - val tag_we = (state === s_refill_wait) && io.mem.resp_val; + val tag_we = (state === s_refill_wait) && io.mem.xact_rep.valid; val data_addr = Mux((state === s_refill_wait) || (state === s_refill), Cat(r_cpu_req_idx(indexmsb,offsetbits), refill_count), io.cpu.req_idx(indexmsb, offsetbits-rf_cnt_bits)).toUFix; @@ -112,10 +112,10 @@ class rocketICache(sets: Int, assoc: Int) extends Component { val hit = valid && (tag_rdata === r_cpu_hit_addr(tagmsb,taglsb)) // data array - val data_array = Mem(sets*REFILL_CYCLES){ io.mem.resp_data } + val data_array = Mem(sets*REFILL_CYCLES){ io.mem.xact_rep.bits.data } data_array.setReadLatency(1); data_array.setTarget('inst); - val data_out = data_array.rw(data_addr, io.mem.resp_data, io.mem.resp_val && repl_me) + val data_out = data_array.rw(data_addr, io.mem.xact_rep.bits.data, io.mem.xact_rep.valid && repl_me) data_mux.io.sel(i) := hit data_mux.io.in(i) := (data_out >> word_shift)(databits-1,0); @@ -128,10 +128,11 @@ class rocketICache(sets: Int, assoc: Int) extends Component { io.cpu.resp_val := !io.cpu.itlb_miss && (state === s_ready) && r_cpu_req_val && tag_hit; rdy := !io.cpu.itlb_miss && (state === s_ready) && (!r_cpu_req_val || tag_hit); io.cpu.resp_data := data_mux.io.out - io.mem.req_val := (state === s_request); - io.mem.req_rw := Bool(false) - io.mem.req_addr := r_cpu_miss_addr(tagmsb,indexlsb).toUFix - io.mem.req_data_val := Bool(false) + io.mem.xact_init.valid := (state === s_request) + io.mem.xact_init.bits.t_type := X_READ_UNCACHED + io.mem.xact_init.bits.has_data := Bool(false) + io.mem.xact_init.bits.address := r_cpu_miss_addr(tagmsb,indexlsb).toUFix + io.mem.xact_init_data.valid := Bool(false) // control state machine switch (state) { @@ -148,19 +149,19 @@ class rocketICache(sets: Int, assoc: Int) extends Component { } is (s_request) { - when (io.mem.req_rdy) { + when (io.mem.xact_init.ready) { state := s_refill_wait; } } is (s_refill_wait) { - when (io.mem.resp_val) { + when (io.mem.xact_rep.valid) { state := s_refill; } } is (s_refill) { - when (io.mem.resp_val && (~refill_count === UFix(0))) { + when (io.mem.xact_rep.valid && refill_count.andR) { state := s_ready; } } - } + } } diff --git a/rocket/src/main/scala/icache_prefetch.scala b/rocket/src/main/scala/icache_prefetch.scala index d47914ec..ba666cd9 100644 --- a/rocket/src/main/scala/icache_prefetch.scala +++ b/rocket/src/main/scala/icache_prefetch.scala @@ -6,8 +6,8 @@ import Constants._; import scala.math._; class ioIPrefetcher extends Bundle() { - val icache = new ioMem().flip - val mem = new ioMem + val icache = new ioTileLink().flip + val mem = new ioTileLink val invalidate = Bool(INPUT) } @@ -18,22 +18,23 @@ class rocketIPrefetcher extends Component() { val s_invalid :: s_valid :: s_refilling :: s_req_wait :: s_resp_wait :: s_bad_resp_wait :: Nil = Enum(6) { UFix() }; val state = Reg(resetVal = s_invalid); - val demand_miss = io.icache.req_val & io.icache.req_rdy; - val prefetch_addr = Reg() { UFix(width = io.icache.req_addr.width) }; - when (demand_miss) { prefetch_addr := io.icache.req_addr + UFix(1); } - - val addr_match = (prefetch_addr === io.icache.req_addr); + val demand_miss = io.icache.xact_init.valid && io.icache.xact_init.ready + val prefetch_addr = Reg() { UFix(width = io.icache.xact_init.bits.address.width) }; + val addr_match = (prefetch_addr === io.icache.xact_init.bits.address); val hit = (state != s_invalid) & (state != s_req_wait) & addr_match; + val prefetch_miss = io.icache.xact_init.valid && !hit + when (demand_miss) { prefetch_addr := io.icache.xact_init.bits.address + UFix(1); } - io.icache.req_rdy := io.mem.req_rdy; - val ip_mem_req_rdy = io.mem.req_rdy & ~(io.icache.req_val & ~hit); - val ip_mem_resp_val = io.mem.resp_val && io.mem.resp_tag(0).toBool; + io.icache.xact_init.ready := io.mem.xact_init.ready + val ip_mem_req_rdy = io.mem.xact_init.ready && !prefetch_miss + val ip_mem_resp_val = io.mem.xact_rep.valid && io.mem.xact_rep.bits.tile_xact_id(0) - io.mem.req_val := io.icache.req_val & ~hit | (state === s_req_wait); - io.mem.req_rw := Bool(false) - io.mem.req_tag := Mux(io.icache.req_val && !hit, UFix(0), UFix(1)) - io.mem.req_addr := Mux(io.mem.req_tag(0).toBool, prefetch_addr, io.icache.req_addr); - io.mem.req_data_val := Bool(false) + io.mem.xact_init.valid := prefetch_miss || (state === s_req_wait) + io.mem.xact_init.bits.t_type := X_READ_UNCACHED + io.mem.xact_init.bits.has_data := Bool(false) + io.mem.xact_init.bits.tile_xact_id := Mux(prefetch_miss, UFix(0), UFix(1)) + io.mem.xact_init.bits.address := Mux(prefetch_miss, io.icache.xact_init.bits.address, prefetch_addr); + io.mem.xact_init_data.valid := Bool(false) val fill_cnt = Reg(resetVal = UFix(0, ceil(log(REFILL_CYCLES)/log(2)).toInt)); when (ip_mem_resp_val.toBool) { fill_cnt := fill_cnt + UFix(1); } @@ -45,11 +46,11 @@ class rocketIPrefetcher extends Component() { val forward_done = (~forward_cnt === UFix(0)) & pdq.io.deq.valid; forward := (demand_miss & hit | forward & ~forward_done); - io.icache.resp_val := (io.mem.resp_val && !io.mem.resp_tag(0).toBool) || (forward && pdq.io.deq.valid); - io.icache.resp_data := Mux(forward, pdq.io.deq.bits, io.mem.resp_data); + io.icache.xact_rep.valid := io.mem.xact_rep.valid && !io.mem.xact_rep.bits.tile_xact_id(0) || (forward && pdq.io.deq.valid) + io.icache.xact_rep.bits.data := Mux(forward, pdq.io.deq.bits, io.mem.xact_rep.bits.data) pdq.io.flush := Reg(demand_miss && !hit || (state === s_bad_resp_wait), resetVal = Bool(false)) - pdq.io.enq.bits := io.mem.resp_data; + pdq.io.enq.bits := io.mem.xact_rep.bits.data pdq.io.enq.valid := ip_mem_resp_val.toBool; pdq.io.deq.ready := forward; diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 6317a764..50dd9fc3 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -133,12 +133,6 @@ class DataArrayArrayReq extends Bundle { val way_en = Bits(width = NWAYS) } -class MemReq extends Bundle { - val rw = Bool() - val addr = UFix(width = PADDR_BITS-OFFSET_BITS) - val tag = Bits(width = MEM_TAG_BITS) -} - class WritebackReq extends Bundle { val ppn = Bits(width = TAG_BITS) val idx = Bits(width = IDX_BITS) @@ -182,7 +176,7 @@ class MSHR(id: Int) extends Component with ThreeStateIncoherence { val way_oh = Bits(NWAYS, OUTPUT) val mem_resp_val = Bool(INPUT) - val mem_req = (new ioDecoupled) { new MemReq() }.flip + val mem_req = (new ioDecoupled) { new TransactionInit }.flip val meta_req = (new ioDecoupled) { new MetaArrayArrayReq() }.flip val replay = (new ioDecoupled) { new Replay() }.flip } @@ -257,10 +251,10 @@ class MSHR(id: Int) extends Component with ThreeStateIncoherence { io.meta_req.bits.way_en := way_oh_ io.mem_req.valid := valid && !requested - //io.mem_req.bits.itm := next_dirty - io.mem_req.bits.rw := Bool(false) - io.mem_req.bits.addr := Cat(ppn, idx_).toUFix - io.mem_req.bits.tag := Bits(id) + io.mem_req.bits.t_type := Mux(needsWriteback(next_state), X_READ_EXCLUSIVE, X_READ_SHARED) + io.mem_req.bits.has_data := Bool(false) + io.mem_req.bits.address := Cat(ppn, idx_).toUFix + io.mem_req.bits.tile_xact_id := Bits(id) io.replay.valid := rpq.io.deq.valid && refilled io.replay.bits.idx := idx_ @@ -287,7 +281,7 @@ class MSHRFile extends Component { val fence_rdy = Bool(OUTPUT) - val mem_req = (new ioDecoupled) { new MemReq() }.flip() + val mem_req = (new ioDecoupled) { new TransactionInit }.flip() val meta_req = (new ioDecoupled) { new MetaArrayArrayReq() }.flip() val replay = (new ioDecoupled) { new Replay() }.flip() } @@ -296,7 +290,7 @@ class MSHRFile extends Component { val mem_resp_idx_mux = (new Mux1H(NMSHR)){ Bits(width = IDX_BITS) } val mem_resp_way_oh_mux = (new Mux1H(NMSHR)){ Bits(width = NWAYS) } val meta_req_arb = (new Arbiter(NMSHR)) { new MetaArrayArrayReq() } - val mem_req_arb = (new Arbiter(NMSHR)) { new MemReq() } + val mem_req_arb = (new Arbiter(NMSHR)) { new TransactionInit } val replay_arb = (new Arbiter(NMSHR)) { new Replay() } val alloc_arb = (new Arbiter(NMSHR)) { Bool() } @@ -421,9 +415,9 @@ class WritebackUnit extends Component { val req = (new ioDecoupled) { new WritebackReq() } val data_req = (new ioDecoupled) { new DataArrayArrayReq() }.flip() val data_resp = Bits(MEM_DATA_BITS, INPUT) - val refill_req = (new ioDecoupled) { new MemReq() } - val mem_req = (new ioDecoupled) { new MemReq() }.flip() - val mem_req_data = (new ioDecoupled) { Bits(width = MEM_DATA_BITS) }.flip() + val refill_req = (new ioDecoupled) { new TransactionInit } + val mem_req = (new ioDecoupled) { new TransactionInit }.flip + val mem_req_data = (new ioDecoupled) { new TransactionInitData }.flip } val valid = Reg(resetVal = Bool(false)) @@ -449,12 +443,13 @@ class WritebackUnit extends Component { val wb_req_val = io.req.valid && !valid io.refill_req.ready := io.mem_req.ready && !wb_req_val io.mem_req.valid := io.refill_req.valid || wb_req_val - io.mem_req.bits.rw := wb_req_val - io.mem_req.bits.addr := Mux(wb_req_val, Cat(io.req.bits.ppn, io.req.bits.idx).toUFix, io.refill_req.bits.addr) - io.mem_req.bits.tag := io.refill_req.bits.tag + io.mem_req.bits.t_type := Mux(wb_req_val, X_WRITE_UNCACHED, io.refill_req.bits.t_type) + io.mem_req.bits.has_data := wb_req_val + io.mem_req.bits.address := Mux(wb_req_val, Cat(io.req.bits.ppn, io.req.bits.idx).toUFix, io.refill_req.bits.address) + io.mem_req.bits.tile_xact_id := Mux(wb_req_val, Bits(NMSHR), io.refill_req.bits.tile_xact_id) io.mem_req_data.valid := data_req_fired - io.mem_req_data.bits := io.data_resp + io.mem_req_data.bits.data := io.data_resp } class FlushUnit(lines: Int) extends Component with ThreeStateIncoherence{ @@ -680,7 +675,7 @@ abstract class HellaCache extends Component { class HellaCacheUniproc extends HellaCache with ThreeStateIncoherence { val io = new Bundle { val cpu = new ioDmem() - val mem = new ioMem + val mem = new ioTileLink } val lines = 1 << IDX_BITS @@ -749,9 +744,11 @@ class HellaCacheUniproc extends HellaCache with ThreeStateIncoherence { val cpu_req_data = Mux(r_replay_amo, r_amo_replay_data, io.cpu.req_data) // refill counter + val mem_resp_type = io.mem.xact_rep.bits.t_type + val refill_val = io.mem.xact_rep.valid && (mem_resp_type === X_READ_SHARED || mem_resp_type === X_READ_EXCLUSIVE) val rr_count = Reg(resetVal = UFix(0, log2up(REFILL_CYCLES))) val rr_count_next = rr_count + UFix(1) - when (io.mem.resp_val) { rr_count := rr_count_next } + when (refill_val) { rr_count := rr_count_next } val misaligned = (((r_cpu_req_type === MT_H) || (r_cpu_req_type === MT_HU)) && (r_cpu_req_idx(0) != Bits(0))) || @@ -806,19 +803,19 @@ class HellaCacheUniproc extends HellaCache with ThreeStateIncoherence { val needs_writeback = needsWriteback(meta_wb_mux.state) // refill response - val block_during_refill = !io.mem.resp_val && (rr_count != UFix(0)) + val block_during_refill = !refill_val && (rr_count != UFix(0)) data_arb.io.in(0).bits.inner_req.offset := rr_count data_arb.io.in(0).bits.inner_req.rw := !block_during_refill data_arb.io.in(0).bits.inner_req.wmask := ~UFix(0, MEM_DATA_BITS/8) - data_arb.io.in(0).bits.inner_req.data := io.mem.resp_data - data_arb.io.in(0).valid := io.mem.resp_val || block_during_refill + data_arb.io.in(0).bits.inner_req.data := io.mem.xact_rep.bits.data + data_arb.io.in(0).valid := refill_val || block_during_refill // load hits data_arb.io.in(4).bits.inner_req.offset := io.cpu.req_idx(offsetmsb,ramindexlsb) data_arb.io.in(4).bits.inner_req.idx := io.cpu.req_idx(indexmsb,indexlsb) data_arb.io.in(4).bits.inner_req.rw := Bool(false) data_arb.io.in(4).bits.inner_req.wmask := UFix(0) // don't care - data_arb.io.in(4).bits.inner_req.data := io.mem.resp_data // don't care + data_arb.io.in(4).bits.inner_req.data := io.mem.xact_rep.bits.data // don't care data_arb.io.in(4).valid := io.cpu.req_val && req_read data_arb.io.in(4).bits.way_en := ~UFix(0, NWAYS) // intiate load on all ways, mux after tag check val early_load_nack = req_read && !data_arb.io.in(4).ready @@ -884,8 +881,8 @@ class HellaCacheUniproc extends HellaCache with ThreeStateIncoherence { mshr.io.req_type := r_cpu_req_type mshr.io.req_sdq_id := replayer.io.sdq_id mshr.io.req_way_oh := replaced_way_oh - mshr.io.mem_resp_val := io.mem.resp_val && (~rr_count === UFix(0)) - mshr.io.mem_resp_tag := io.mem.resp_tag + mshr.io.mem_resp_val := refill_val && (~rr_count === UFix(0)) + mshr.io.mem_resp_tag := io.mem.xact_rep.bits.tile_xact_id mshr.io.mem_req <> wb.io.refill_req mshr.io.meta_req <> meta_arb.io.in(1) mshr.io.replay <> replayer.io.replay @@ -968,14 +965,7 @@ class HellaCacheUniproc extends HellaCache with ThreeStateIncoherence { io.cpu.resp_type := loadgen.io.typ io.cpu.resp_data := loadgen.io.dout io.cpu.resp_data_subword := loadgen.io.r_dout_subword - - wb.io.mem_req.ready := io.mem.req_rdy - io.mem.req_val := wb.io.mem_req.valid - io.mem.req_rw := wb.io.mem_req.bits.rw - io.mem.req_tag := wb.io.mem_req.bits.tag.toUFix - io.mem.req_addr := wb.io.mem_req.bits.addr - - io.mem.req_data_val := wb.io.mem_req_data.valid - wb.io.mem_req_data.ready := io.mem.req_data_rdy - io.mem.req_data_bits := wb.io.mem_req_data.bits + + io.mem.xact_init <> wb.io.mem_req + io.mem.xact_init_data <> wb.io.mem_req_data } diff --git a/rocket/src/main/scala/queues.scala b/rocket/src/main/scala/queues.scala index fb1dd542..14f416ba 100644 --- a/rocket/src/main/scala/queues.scala +++ b/rocket/src/main/scala/queues.scala @@ -10,7 +10,7 @@ class ioQueue[T <: Data](flushable: Boolean)(data: => T) extends Bundle val deq = new ioDecoupled()(data).flip } -class queue[T <: Data](entries: Int, flushable: Boolean = false)(data: => T) extends Component +class queue[T <: Data](entries: Int, pipe: Boolean = false, flushable: Boolean = false)(data: => T) extends Component { val io = new ioQueue(flushable)(data) @@ -50,6 +50,6 @@ class queue[T <: Data](entries: Int, flushable: Boolean = false)(data: => T) ext } io.deq.valid := maybe_full || enq_ptr != deq_ptr - io.enq.ready := !maybe_full || enq_ptr != deq_ptr + io.enq.ready := !maybe_full || enq_ptr != deq_ptr || (if (pipe) io.deq.ready else Bool(false)) io.deq.bits <> Mem(entries, do_enq, enq_ptr, io.enq.bits).read(deq_ptr) } diff --git a/rocket/src/main/scala/top.scala b/rocket/src/main/scala/top.scala index 3f3fd1d2..7a472a94 100644 --- a/rocket/src/main/scala/top.scala +++ b/rocket/src/main/scala/top.scala @@ -7,7 +7,7 @@ import Constants._; class ioTop(htif_width: Int) extends Bundle { val debug = new ioDebug(); val host = new ioHost(htif_width); - val mem = new ioMem(); + val mem = new ioMem } class Top() extends Component { @@ -21,23 +21,39 @@ class Top() extends Component { val icache_pf = new rocketIPrefetcher(); val dcache = new HellaCacheUniproc(); - val arbiter = new rocketMemArbiter(4); + val arbiter = new rocketMemArbiter(3 + (if (HAVE_VEC) 1 else 0)); arbiter.io.requestor(0) <> dcache.io.mem arbiter.io.requestor(1) <> icache_pf.io.mem - arbiter.io.requestor(3) <> htif.io.mem - arbiter.io.mem <> io.mem + arbiter.io.requestor(2) <> htif.io.mem + + val hub = new CoherenceHubNull + // connect tile to hub (figure out how to do this more compactly) + val xact_init_q = (new queue(2)) { new TransactionInit } + xact_init_q.io.enq <> arbiter.io.mem.xact_init + xact_init_q.io.deq <> hub.io.tile.xact_init + val xact_init_data_q = (new queue(2)) { new TransactionInitData } + xact_init_data_q.io.enq <> arbiter.io.mem.xact_init_data + xact_init_data_q.io.deq <> hub.io.tile.xact_init_data + val xact_rep_q = (new queue(1, pipe = true)) { new TransactionReply } + xact_rep_q.io.enq <> hub.io.tile.xact_rep + xact_rep_q.io.deq <> arbiter.io.mem.xact_rep + // connect hub to memory + val mem_req_q = (new queue(2)) { new MemReqCmd } + mem_req_q.io.enq <> hub.io.mem.req_cmd + mem_req_q.io.deq <> io.mem.req_cmd + val mem_req_data_q = (new queue(2)) { new MemData } + mem_req_data_q.io.enq <> hub.io.mem.req_data + mem_req_data_q.io.deq <> io.mem.req_data + hub.io.mem.resp.valid := Reg(io.mem.resp.valid, resetVal = Bool(false)) + hub.io.mem.resp.bits := Reg(io.mem.resp.bits) + if (HAVE_VEC) { val vicache = new rocketICache(128, 2); // 128 sets x 2 ways - arbiter.io.requestor(2) <> vicache.io.mem + arbiter.io.requestor(3) <> vicache.io.mem cpu.io.vimem <> vicache.io.cpu; } - else - { - arbiter.io.requestor(2).req_val := Bool(false) - arbiter.io.requestor(2).req_data_val := Bool(false) - } htif.io.host <> io.host cpu.io.host <> htif.io.cpu(0); diff --git a/rocket/src/main/scala/util.scala b/rocket/src/main/scala/util.scala index 5ac3b41b..47361d2e 100644 --- a/rocket/src/main/scala/util.scala +++ b/rocket/src/main/scala/util.scala @@ -180,6 +180,12 @@ class ioDecoupled[T <: Data]()(data: => T) extends Bundle val bits = data.asInput } +class ioValid[T <: Data]()(data: => T) extends Bundle +{ + val valid = Bool(INPUT) + val bits = data.asInput +} + class ioArbiter[T <: Data](n: Int)(data: => T) extends Bundle { val in = Vec(n) { (new ioDecoupled()) { data } } val out = (new ioDecoupled()) { data }.flip()