diff --git a/rocket/src/main/scala/arbiter.scala b/rocket/src/main/scala/arbiter.scala index 50f66a58..c9b442f1 100644 --- a/rocket/src/main/scala/arbiter.scala +++ b/rocket/src/main/scala/arbiter.scala @@ -10,8 +10,11 @@ class ioMem() extends Bundle val req_rdy = Bool(INPUT); val req_rw = Bool(OUTPUT); val req_addr = UFix(PADDR_BITS - OFFSET_BITS, OUTPUT); - val req_wdata = Bits(MEM_DATA_BITS, OUTPUT); val req_tag = Bits(MEM_TAG_BITS, OUTPUT); + + val req_data_val = Bool(OUTPUT); + val req_data_rdy = Bool(INPUT); + val req_data_bits = Bits(MEM_DATA_BITS, OUTPUT); val resp_val = Bool(INPUT); val resp_tag = Bits(MEM_TAG_BITS, INPUT); @@ -35,24 +38,37 @@ class rocketMemArbiter(n: Int) extends Component { req_rdy = req_rdy && !io.requestor(i).req_val } + var req_data_val = Bool(false) + var req_data_rdy = io.mem.req_data_rdy + for (i <- 0 until n) + { + io.requestor(i).req_data_rdy := req_data_rdy + req_data_val = req_data_val || io.requestor(i).req_data_val + req_data_rdy = req_data_rdy && !io.requestor(i).req_data_val + } + var req_rw = io.requestor(n-1).req_rw var req_addr = io.requestor(n-1).req_addr - var req_wdata = io.requestor(n-1).req_wdata var req_tag = Cat(io.requestor(n-1).req_tag, UFix(n-1, log2up(n))) for (i <- n-1 to 0 by -1) { req_rw = Mux(io.requestor(i).req_val, io.requestor(i).req_rw, req_rw) req_addr = Mux(io.requestor(i).req_val, io.requestor(i).req_addr, req_addr) - req_wdata = Mux(io.requestor(i).req_val, io.requestor(i).req_wdata, req_wdata) req_tag = Mux(io.requestor(i).req_val, Cat(io.requestor(i).req_tag, UFix(i, log2up(n))), req_tag) } + var req_data_bits = io.requestor(n-1).req_data_bits + for (i <- n-1 to 0 by -1) + req_data_bits = Mux(io.requestor(i).req_data_val, io.requestor(i).req_data_bits, req_data_bits) + io.mem.req_val := req_val io.mem.req_rw := req_rw io.mem.req_addr := req_addr - io.mem.req_wdata := req_wdata io.mem.req_tag := req_tag + io.mem.req_data_val := req_data_val + io.mem.req_data_bits := req_data_bits + for (i <- 0 until n) { io.requestor(i).resp_val := io.mem.resp_val && io.mem.resp_tag(log2up(n)-1,0) === UFix(i) diff --git a/rocket/src/main/scala/coherence.scala b/rocket/src/main/scala/coherence.scala index 11c764ec..75c430a3 100644 --- a/rocket/src/main/scala/coherence.scala +++ b/rocket/src/main/scala/coherence.scala @@ -424,12 +424,6 @@ class CoherenceHubNoDir extends CoherenceHub { //mem_req_arb.io.out.ready := io.mem.req_cmd.ready || io.mem.req_data.ready io.mem.req_cmd <> mem_req_arb.io.out.bits.req_cmd io.mem.req_data <> mem_req_arb.io.out.bits.req_data - //io.mem.req_wdata := MuxLookup(mem_req_arb.io.out.bits.data_idx, - // Bits(0, width = MEM_DATA_BITS), - // (0 until NTILES).map( j => - // UFix(j) -> Mux(mem_req_arb.io.out.bits.is_probe_rep, - // io.tiles(j).probe_rep_data.bits.data, - // io.tiles(j).xact_init_data.bits.data))) // Handle probe replies, which may or may not have data for( j <- 0 until NTILES ) { diff --git a/rocket/src/main/scala/htif.scala b/rocket/src/main/scala/htif.scala index 77d189eb..04e2bf5a 100644 --- a/rocket/src/main/scala/htif.scala +++ b/rocket/src/main/scala/htif.scala @@ -78,7 +78,7 @@ class rocketHTIF(w: Int, ncores: Int) extends Component Mux(!nack && cmd === cmd_readcr, UFix(1), UFix(0))) val tx_done = packet_ram_raddr - UFix(1) === tx_size - val state_rx :: state_pcr :: state_mem_req :: state_mem_resp :: state_tx :: Nil = Enum(5) { UFix() } + val state_rx :: state_pcr :: state_mem_req :: state_mem_wdata :: state_mem_rdata :: state_tx :: Nil = Enum(6) { UFix() } val state = Reg(resetVal = state_rx) when (state === state_rx && rx_done) { @@ -94,18 +94,11 @@ class rocketHTIF(w: Int, ncores: Int) extends Component val mem_cnt = Reg(resetVal = UFix(0, log2up(REFILL_CYCLES))) when (state === state_mem_req && io.mem.req_rdy) { - when (cmd === cmd_writemem) { - when (mem_cnt.andR) { - state := state_tx - } - mem_cnt := mem_cnt + UFix(1) - } - .otherwise { - state := state_mem_resp - } + state := Mux(cmd === cmd_writemem, state_mem_wdata, state_mem_rdata) } - when (state === state_mem_resp && io.mem.resp_val) { - when (mem_cnt.andR) { + when (state === state_mem_wdata && io.mem.req_data_rdy || + state === state_mem_rdata && io.mem.resp_val) { + when (mem_cnt.andR) { state := state_tx } mem_cnt := mem_cnt + UFix(1) @@ -120,13 +113,15 @@ class rocketHTIF(w: Int, ncores: Int) extends Component for (i <- 0 until MEM_DATA_BITS/short_request_bits) { val idx = Cat(mem_cnt, UFix(i, log2up(MEM_DATA_BITS/short_request_bits))) packet_ram.write(idx, io.mem.resp_data((i+1)*short_request_bits-1, i*short_request_bits), - state === state_mem_resp && io.mem.resp_val) + state === state_mem_rdata && io.mem.resp_val) mem_req_data = Cat(packet_ram.read(idx), mem_req_data) } io.mem.req_val := state === state_mem_req io.mem.req_rw := cmd === cmd_writemem io.mem.req_addr := addr >> UFix(OFFSET_BITS-3) - io.mem.req_wdata := mem_req_data + + io.mem.req_data_val := state === state_mem_wdata + io.mem.req_data_bits := mem_req_data pcr_done := Bool(false) val pcr_mux = (new Mux1H(ncores)) { Bits(width = 64) } diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index 09cbb156..92bfa0f6 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -131,6 +131,7 @@ class rocketICache(sets: Int, assoc: Int) extends Component { io.mem.req_val := (state === s_request); io.mem.req_rw := Bool(false) io.mem.req_addr := r_cpu_miss_addr(tagmsb,indexlsb).toUFix + io.mem.req_data_val := Bool(false) // control state machine switch (state) { diff --git a/rocket/src/main/scala/icache_prefetch.scala b/rocket/src/main/scala/icache_prefetch.scala index 104257e0..d47914ec 100644 --- a/rocket/src/main/scala/icache_prefetch.scala +++ b/rocket/src/main/scala/icache_prefetch.scala @@ -33,6 +33,7 @@ class rocketIPrefetcher extends Component() { io.mem.req_rw := Bool(false) io.mem.req_tag := Mux(io.icache.req_val && !hit, UFix(0), UFix(1)) io.mem.req_addr := Mux(io.mem.req_tag(0).toBool, prefetch_addr, io.icache.req_addr); + io.mem.req_data_val := Bool(false) val fill_cnt = Reg(resetVal = UFix(0, ceil(log(REFILL_CYCLES)/log(2)).toInt)); when (ip_mem_resp_val.toBool) { fill_cnt := fill_cnt + UFix(1); } diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 89a4e5c6..6317a764 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -423,29 +423,21 @@ class WritebackUnit extends Component { val data_resp = Bits(MEM_DATA_BITS, INPUT) val refill_req = (new ioDecoupled) { new MemReq() } val mem_req = (new ioDecoupled) { new MemReq() }.flip() - val mem_req_data = Bits(MEM_DATA_BITS, OUTPUT) + val mem_req_data = (new ioDecoupled) { Bits(width = MEM_DATA_BITS) }.flip() } - val wbq = (new queue(REFILL_CYCLES)) { Bits(width = MEM_DATA_BITS) } val valid = Reg(resetVal = Bool(false)) + val data_req_fired = Reg(resetVal = Bool(false)) val cnt = Reg() { UFix(width = log2up(REFILL_CYCLES+1)) } val addr = Reg() { new WritebackReq() } - // don't allow memory requests to bypass conflicting writebacks. - // also don't allow a refill request once a writeback has started. - // TODO: turn this into a victim buffer. - val block_refill = valid && ((io.refill_req.bits.addr(IDX_BITS-1,0) === addr.idx) || (cnt === UFix(REFILL_CYCLES))) - val refill_val = io.refill_req.valid && !block_refill - - wbq.io.enq.valid := valid && Reg(io.data_req.valid && io.data_req.ready) - wbq.io.enq.bits := io.data_resp - wbq.io.deq.ready := io.mem_req.ready && !refill_val && (cnt === UFix(REFILL_CYCLES)) - - when (io.data_req.valid && io.data_req.ready) { cnt := cnt + UFix(1) } - when ((cnt === UFix(REFILL_CYCLES)) && !wbq.io.deq.valid) { valid := Bool(false) } + data_req_fired := Bool(false) + when (io.data_req.valid && io.data_req.ready) { data_req_fired := Bool(true); cnt := cnt + UFix(1) } + when (data_req_fired && !io.mem_req_data.ready) { data_req_fired := Bool(false); cnt := cnt - UFix(1) } + when ((cnt === UFix(REFILL_CYCLES)) && io.mem_req_data.ready) { valid := Bool(false) } when (io.req.valid && io.req.ready) { valid := Bool(true); cnt := UFix(0); addr := io.req.bits } - io.req.ready := !valid + io.req.ready := !valid && io.mem_req.ready io.data_req.valid := valid && (cnt < UFix(REFILL_CYCLES)) io.data_req.bits.way_en := addr.way_oh io.data_req.bits.inner_req.idx := addr.idx @@ -454,12 +446,15 @@ class WritebackUnit extends Component { io.data_req.bits.inner_req.wmask := Bits(0) io.data_req.bits.inner_req.data := Bits(0) - io.refill_req.ready := io.mem_req.ready && !block_refill - io.mem_req.valid := refill_val || wbq.io.deq.valid && (cnt === UFix(REFILL_CYCLES)) - io.mem_req.bits.rw := !refill_val - io.mem_req.bits.addr := Mux(refill_val, io.refill_req.bits.addr, Cat(addr.ppn, addr.idx).toUFix) + val wb_req_val = io.req.valid && !valid + io.refill_req.ready := io.mem_req.ready && !wb_req_val + io.mem_req.valid := io.refill_req.valid || wb_req_val + io.mem_req.bits.rw := wb_req_val + io.mem_req.bits.addr := Mux(wb_req_val, Cat(io.req.bits.ppn, io.req.bits.idx).toUFix, io.refill_req.bits.addr) io.mem_req.bits.tag := io.refill_req.bits.tag - io.mem_req_data := wbq.io.deq.bits + + io.mem_req_data.valid := data_req_fired + io.mem_req_data.bits := io.data_resp } class FlushUnit(lines: Int) extends Component with ThreeStateIncoherence{ @@ -977,7 +972,10 @@ class HellaCacheUniproc extends HellaCache with ThreeStateIncoherence { wb.io.mem_req.ready := io.mem.req_rdy io.mem.req_val := wb.io.mem_req.valid io.mem.req_rw := wb.io.mem_req.bits.rw - io.mem.req_wdata := wb.io.mem_req_data io.mem.req_tag := wb.io.mem_req.bits.tag.toUFix io.mem.req_addr := wb.io.mem_req.bits.addr + + io.mem.req_data_val := wb.io.mem_req_data.valid + wb.io.mem_req_data.ready := io.mem.req_data_rdy + io.mem.req_data_bits := wb.io.mem_req_data.bits } diff --git a/rocket/src/main/scala/top.scala b/rocket/src/main/scala/top.scala index f379c017..3f3fd1d2 100644 --- a/rocket/src/main/scala/top.scala +++ b/rocket/src/main/scala/top.scala @@ -34,7 +34,10 @@ class Top() extends Component { cpu.io.vimem <> vicache.io.cpu; } else + { arbiter.io.requestor(2).req_val := Bool(false) + arbiter.io.requestor(2).req_data_val := Bool(false) + } htif.io.host <> io.host cpu.io.host <> htif.io.cpu(0);