diff --git a/rocket/src/main/scala/arbiter.scala b/rocket/src/main/scala/arbiter.scala index 7e2ca8b4..eef8ea6f 100644 --- a/rocket/src/main/scala/arbiter.scala +++ b/rocket/src/main/scala/arbiter.scala @@ -10,15 +10,15 @@ class ioMem() extends Bundle val req_rdy = Bool('input); val req_rw = Bool('output); val req_addr = UFix(PADDR_BITS, 'output); - val req_wdata = Bits(128, 'output); - val req_tag = Bits(4, 'output); + val req_wdata = Bits(MEM_DATA_BITS, 'output); + val req_tag = Bits(MEM_TAG_BITS, 'output); val resp_val = Bool('input); - val resp_tag = Bits(4, 'input); - val resp_data = Bits(128, 'input); + val resp_tag = Bits(MEM_TAG_BITS, 'input); + val resp_data = Bits(MEM_DATA_BITS, 'input); } -class ioArbiter extends Bundle() { +class ioMemArbiter extends Bundle() { val mem = new ioMem(); val dcache = new ioDcache(); // val icache = new ioIcache(); @@ -26,7 +26,7 @@ class ioArbiter extends Bundle() { } class rocketMemArbiter extends Component { - val io = new ioArbiter(); + val io = new ioMemArbiter(); // ***************************** // Interface to memory @@ -41,11 +41,8 @@ class rocketMemArbiter extends Component { // Give priority to Icache io.mem.req_addr := Mux(io.icache.req_val,io.icache.req_addr,io.dcache.req_addr); - // high bit of tag=0 for I$, tag=0 for D$ -// io.mem.req_tag := Mux(io.icache.req_val,Bits(0,4),Bits(1,4)); - io.mem.req_tag := Mux(io.icache.req_val, - Cat(Bits(0,1), io.icache.req_tag), - Cat(Bits(1,1), io.dcache.req_tag)); + // low bit of tag=0 for I$, 1 for D$ + io.mem.req_tag := Cat(Mux(io.icache.req_val, io.icache.req_tag, io.dcache.req_tag), !io.icache.req_val) // Just pass through write data (only D$ will write) io.mem.req_wdata := io.dcache.req_wdata; @@ -59,15 +56,15 @@ class rocketMemArbiter extends Component { io.dcache.req_rdy := io.mem.req_rdy && !io.icache.req_val; // Response will only be valid for D$ or I$ not both because of tag bits - io.icache.resp_val := io.mem.resp_val && !io.mem.resp_tag(3).toBool; - io.dcache.resp_val := io.mem.resp_val && io.mem.resp_tag(3).toBool; + io.icache.resp_val := io.mem.resp_val && !io.mem.resp_tag(0).toBool; + io.dcache.resp_val := io.mem.resp_val && io.mem.resp_tag(0).toBool; // Feed through data to both io.icache.resp_data := io.mem.resp_data; io.dcache.resp_data := io.mem.resp_data; - io.icache.resp_tag := io.mem.resp_tag(2,0); -// io.dcache.resp_tag := io.mem.resp_tag(2,0); + io.icache.resp_tag := io.mem.resp_tag >> UFix(1) + io.dcache.resp_tag := io.mem.resp_tag >> UFix(1) } diff --git a/rocket/src/main/scala/consts.scala b/rocket/src/main/scala/consts.scala index d93eac51..1c4c2996 100644 --- a/rocket/src/main/scala/consts.scala +++ b/rocket/src/main/scala/consts.scala @@ -1,6 +1,7 @@ package Top { import Chisel._ +import scala.math._ object Constants { @@ -125,10 +126,12 @@ object Constants val M_X = UFix(0, 4); val M_XRD = Bits("b0000", 4); // int load val M_XWR = Bits("b0001", 4); // int store - val M_FRD = Bits("b0010", 4); // fp load - val M_FWR = Bits("b0011", 4); // fp store - val M_FLA = Bits("b0100", 4); // flush cache + val M_PFR = Bits("b0010", 4); // prefetch with intent to read + val M_PFW = Bits("b0011", 4); // prefetch with intent to write + val M_FLA = Bits("b0100", 4); // write back and invlaidate all lines val M_PRD = Bits("b0101", 4); // PTW load + val M_INV = Bits("b0110", 4); // write back and invalidate line + val M_CLN = Bits("b0111", 4); // write back line val M_XA_ADD = Bits("b1000", 4); val M_XA_SWAP = Bits("b1001", 4); val M_XA_AND = Bits("b1010", 4); @@ -183,6 +186,23 @@ object Constants val VPN_BITS = VADDR_BITS-PGIDX_BITS; val ASID_BITS = 7; val PERM_BITS = 6; + + // rocketNBDCacheDM parameters + val CPU_DATA_BITS = 64; + val CPU_TAG_BITS = 5; + val OFFSET_BITS = 6; // log2(cache line size in bytes) + val NMSHR = 2; // number of primary misses + val NRPQ = 16; // number of secondary misses + val NSDQ = 10; // number of secondary stores/AMOs + val LG_REFILL_WIDTH = 4; // log2(cache bus width in bytes) + val IDX_BITS = PGIDX_BITS - OFFSET_BITS; + + // external memory interface + val IMEM_TAG_BITS = 1; + val DMEM_TAG_BITS = ceil(log(NMSHR)/log(2)).toInt; + val MEM_TAG_BITS = 1 + max(IMEM_TAG_BITS, DMEM_TAG_BITS); + val MEM_DATA_BITS = 128; + val REFILL_CYCLES = (1 << OFFSET_BITS)*8/MEM_DATA_BITS; val DTLB_ENTRIES = 8; val ITLB_ENTRIES = 8; diff --git a/rocket/src/main/scala/dcache.scala b/rocket/src/main/scala/dcache.scala index 0c6849a2..23d8352a 100644 --- a/rocket/src/main/scala/dcache.scala +++ b/rocket/src/main/scala/dcache.scala @@ -27,13 +27,13 @@ class ioDmem(view: List[String] = null) extends Bundle(view) { // interface between D$ and next level in memory hierarchy class ioDcache(view: List[String] = null) extends Bundle(view) { val req_addr = UFix(PADDR_BITS, 'input); - val req_tag = UFix(3, 'input); + val req_tag = UFix(DMEM_TAG_BITS, 'input); val req_val = Bool('input); val req_rdy = Bool('output); val req_wdata = Bits(128, 'input); val req_rw = Bool('input); val resp_data = Bits(128, 'output); -// val resp_tag = Bits(3, 'output); + val resp_tag = Bits(DMEM_TAG_BITS, 'output); val resp_val = Bool('output); } diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index c82dda68..e65591be 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -24,7 +24,7 @@ class ioIcache(view: List[String] = null) extends Bundle (view) val req_addr = UFix(PADDR_BITS, 'input); val req_val = Bool('input); val req_rdy = Bool('output); - val resp_data = Bits(128, 'output); + val resp_data = Bits(MEM_DATA_BITS, 'output); val resp_val = Bool('output); } @@ -50,8 +50,9 @@ class rocketICacheDM(lines: Int) extends Component { val indexmsb = taglsb-1; val indexlsb = offsetbits; val offsetmsb = indexlsb-1; - val offsetlsb = 2; val databits = 32; + val offsetlsb = ceil(log(databits/8)/log(2)).toInt; + val rf_cnt_bits = ceil(log(REFILL_CYCLES)/log(2)).toInt; val s_reset :: s_ready :: s_request :: s_refill_wait :: s_refill :: s_resolve_miss :: Nil = Enum(6) { UFix() }; val state = Reg(resetVal = s_reset); @@ -74,7 +75,7 @@ class rocketICacheDM(lines: Int) extends Component { } // refill counter - val refill_count = Reg(resetVal = UFix(0,2)); + val refill_count = Reg(resetVal = UFix(0, rf_cnt_bits)); when (io.mem.resp_val) { refill_count <== refill_count + UFix(1); } @@ -104,7 +105,7 @@ class rocketICacheDM(lines: Int) extends Component { val data_addr = Mux((state === s_refill_wait) || (state === s_refill), Cat(r_cpu_req_idx(PGIDX_BITS-1, offsetbits), refill_count), io.cpu.req_idx(PGIDX_BITS-1, offsetmsb-1)).toUFix; - val data_array = Mem4(lines*4, io.mem.resp_data); + val data_array = Mem4(lines*REFILL_CYCLES, io.mem.resp_data); data_array.setReadLatency(SRAM_READ_LATENCY); // data_array.setTarget('inst); val data_array_rdata = data_array.rw(data_addr, io.mem.resp_data, io.mem.resp_val); @@ -112,14 +113,14 @@ class rocketICacheDM(lines: Int) extends Component { // output signals io.cpu.resp_val := !io.cpu.itlb_miss && (state === s_ready) && r_cpu_req_val && tag_valid && tag_match; io.cpu.req_rdy := !io.cpu.itlb_miss && (state === s_ready) && (!r_cpu_req_val || (tag_valid && tag_match)); - io.cpu.resp_data := - MuxLookup(r_cpu_req_idx(offsetmsb-2, offsetlsb).toUFix, data_array_rdata(127, 96), - Array(UFix(2) -> data_array_rdata(95,64), - UFix(1) -> data_array_rdata(63,32), - UFix(0) -> data_array_rdata(31,0))); + + val word_mux = (new MuxN(REFILL_CYCLES)) { Bits(width = databits) } + word_mux.io.sel := r_cpu_req_idx(offsetmsb - rf_cnt_bits, offsetlsb).toUFix + for (i <- 0 to MEM_DATA_BITS/databits-1) { word_mux.io.in(i) := data_array_rdata((i+1)*databits-1, i*databits) } + io.cpu.resp_data := word_mux.io.out io.mem.req_val := (state === s_request); - io.mem.req_addr := Cat(r_cpu_req_ppn, r_cpu_req_idx(PGIDX_BITS-1, offsetbits), Bits(0,2)).toUFix; + io.mem.req_addr := Cat(r_cpu_req_ppn, r_cpu_req_idx(PGIDX_BITS-1, offsetbits), Bits(0, rf_cnt_bits)).toUFix; // control state machine switch (state) { @@ -146,7 +147,7 @@ class rocketICacheDM(lines: Int) extends Component { } } is (s_refill) { - when (io.mem.resp_val && (refill_count === UFix(3,2))) { + when (io.mem.resp_val && (~refill_count === UFix(0))) { state <== s_resolve_miss; } } diff --git a/rocket/src/main/scala/icache_prefetch.scala b/rocket/src/main/scala/icache_prefetch.scala index 9e1d1486..540734ec 100644 --- a/rocket/src/main/scala/icache_prefetch.scala +++ b/rocket/src/main/scala/icache_prefetch.scala @@ -3,17 +3,17 @@ package Top { import Chisel._; import Node._; import Constants._; -import queues._; +import scala.math._; class ioIPrefetcherMem(view: List[String] = null) extends Bundle (view) { val req_addr = UFix(PADDR_BITS, 'output); val req_val = Bool('output); val req_rdy = Bool('input); - val req_tag = Bits(3, 'output); - val resp_data = Bits(128, 'input); + val req_tag = Bits(IMEM_TAG_BITS, 'output); + val resp_data = Bits(MEM_DATA_BITS, 'input); val resp_val = Bool('input); - val resp_tag = Bits(3, 'input); + val resp_tag = Bits(IMEM_TAG_BITS, 'input); } class ioIPrefetcher extends Bundle() { @@ -23,14 +23,14 @@ class ioIPrefetcher extends Bundle() { class rocketIPrefetcher extends Component() { val io = new ioIPrefetcher(); - val pdq = (new queueSimplePF(4)) { Bits(width = 128) }; + val pdq = (new queueSimplePF(REFILL_CYCLES)) { Bits(width = MEM_DATA_BITS) }; val s_invalid :: s_valid :: s_refilling :: s_req_wait :: s_resp_wait :: s_bad_resp_wait :: Nil = Enum(6) { UFix() }; val state = Reg(resetVal = s_invalid); val demand_miss = io.icache.req_val & io.icache.req_rdy; - val prefetch_addr = Reg(resetVal = UFix(0,32)); - when (demand_miss) { prefetch_addr <== io.icache.req_addr + UFix(4); } + val prefetch_addr = Reg() { UFix(width = PADDR_BITS) }; + when (demand_miss) { prefetch_addr <== io.icache.req_addr + UFix(REFILL_CYCLES); } val addr_match = (prefetch_addr === io.icache.req_addr); val hit = (state != s_invalid) & (state != s_req_wait) & addr_match; @@ -40,29 +40,29 @@ class rocketIPrefetcher extends Component() { val ip_mem_resp_val = io.mem.resp_val && io.mem.resp_tag(0).toBool; io.mem.req_val := io.icache.req_val & ~hit | (state === s_req_wait); - io.mem.req_tag := Cat(Bits(0,2), !(io.icache.req_val && !hit)); + io.mem.req_tag := !(io.icache.req_val && !hit); io.mem.req_addr := Mux(io.mem.req_tag(0).toBool, prefetch_addr, io.icache.req_addr); val pdq_reset = Reg(resetVal = Bool(true)); pdq_reset <== demand_miss & ~hit | (state === s_bad_resp_wait); - val fill_cnt = Reg(resetVal = UFix(0,2)); - when (ip_mem_resp_val.toBool) { fill_cnt <== fill_cnt + UFix(1,1); } - val fill_done = (fill_cnt === UFix(3,2)) & ip_mem_resp_val; + val fill_cnt = Reg(resetVal = UFix(0, ceil(log(REFILL_CYCLES)/log(2)).toInt)); + when (ip_mem_resp_val.toBool) { fill_cnt <== fill_cnt + UFix(1); } + val fill_done = (~fill_cnt === UFix(0)) & ip_mem_resp_val; val forward = Reg(resetVal = Bool(false)); - val forward_cnt = Reg(resetVal = UFix(0,2)); - when (forward & pdq.io.deq_val) { forward_cnt <== forward_cnt + UFix(1,1); } - val forward_done = (forward_cnt === UFix(3,2)) & pdq.io.deq_val; + val forward_cnt = Reg(resetVal = UFix(0, ceil(log(REFILL_CYCLES)/log(2)).toInt)); + when (forward & pdq.io.deq.valid) { forward_cnt <== forward_cnt + UFix(1); } + val forward_done = (~forward_cnt === UFix(0)) & pdq.io.deq.valid; forward <== (demand_miss & hit | forward & ~forward_done); - io.icache.resp_val := (io.mem.resp_val && !io.mem.resp_tag(0).toBool) || (forward && pdq.io.deq_val); - io.icache.resp_data := Mux(forward, pdq.io.deq_bits, io.mem.resp_data); + io.icache.resp_val := (io.mem.resp_val && !io.mem.resp_tag(0).toBool) || (forward && pdq.io.deq.valid); + io.icache.resp_data := Mux(forward, pdq.io.deq.bits, io.mem.resp_data); pdq.io.q_reset := pdq_reset; - pdq.io.enq_bits := io.mem.resp_data; - pdq.io.enq_val := ip_mem_resp_val.toBool; - pdq.io.deq_rdy := forward; + pdq.io.enq.bits := io.mem.resp_data; + pdq.io.enq.valid := ip_mem_resp_val.toBool; + pdq.io.deq.ready := forward; switch (state) { is (s_invalid) { diff --git a/rocket/src/main/scala/queues.scala b/rocket/src/main/scala/queues.scala index 01326b54..b81d90a5 100644 --- a/rocket/src/main/scala/queues.scala +++ b/rocket/src/main/scala/queues.scala @@ -1,4 +1,4 @@ -package queues +package Top { import Chisel._ @@ -81,15 +81,11 @@ class queueCtrl(entries: Int) extends Component full <== full_next; } -class ioQueueSimplePF[T <: Data]()(data: => T) extends Bundle() +class ioQueueSimplePF[T <: Data]()(data: => T) extends Bundle { - val q_reset = Bool('input); - val enq_val = Bool('input); - val enq_rdy = Bool('output); - val deq_val = Bool('output); - val deq_rdy = Bool('input); - val enq_bits = data.asInput; - val deq_bits = data.asOutput; + val q_reset = Bool('input); + val enq = new ioDecoupled()(data) + val deq = new ioDecoupled()(data).flip } class queueSimplePF[T <: Data](entries: Int)(data: => T) extends Component @@ -97,12 +93,12 @@ class queueSimplePF[T <: Data](entries: Int)(data: => T) extends Component override val io = new ioQueueSimplePF()(data); val ctrl = new queueCtrl(entries); ctrl.io.q_reset ^^ io.q_reset; - ctrl.io.deq_val ^^ io.deq_val; - ctrl.io.enq_rdy ^^ io.enq_rdy; - ctrl.io.enq_val ^^ io.enq_val; - ctrl.io.deq_rdy ^^ io.deq_rdy; - val ram = Mem(entries, ctrl.io.wen, ctrl.io.waddr, io.enq_bits); - ram.read(ctrl.io.raddr) ^^ io.deq_bits; + ctrl.io.deq_val ^^ io.deq.valid; + ctrl.io.enq_rdy ^^ io.enq.ready; + ctrl.io.enq_val ^^ io.enq.valid; + ctrl.io.deq_rdy ^^ io.deq.ready; + val ram = Mem(entries, ctrl.io.wen, ctrl.io.waddr, io.enq.bits); + ram.read(ctrl.io.raddr) ^^ io.deq.bits; } // TODO: SHOULD USE INHERITANCE BUT BREAKS INTROSPECTION CODE diff --git a/rocket/src/main/scala/util.scala b/rocket/src/main/scala/util.scala index edc3173d..4b0d86ec 100644 --- a/rocket/src/main/scala/util.scala +++ b/rocket/src/main/scala/util.scala @@ -5,6 +5,79 @@ import Chisel._ import Node._; import scala.math._; +class MuxN[T <: Data](n: Int)(data: => T) extends Component { + val io = new Bundle { + val sel = Bits(width = ceil(log(n)/log(2)).toInt) + val in = Vec(n) { data }.asInput() + val out = data.asOutput() + } + + val out = Vec(n) { Wire() { data } } + out(0) <== io.in(0) + for (i <- 1 to n-1) { + out(i) <== Mux(io.sel === UFix(i), io.in(i), out(i-1)) + } + + out(n-1) ^^ io.out +} + +class Mux1H(n: Int, w: Int) extends Component +{ + val io = new Bundle { + val sel = Vec(n) { Bool(dir = 'input) } + val in = Vec(n) { Bits(width = w, dir = 'input) } + val out = Bits(width = w, dir = 'output) + } + + if (n > 1) { + val out = Vec(n) { Wire() { Bits(width = w) } } + out(0) <== io.in(0) & Fill(w, io.sel(0)) + for (i <- 1 to n-1) { + out(i) <== out(i-1) | (io.in(i) & Fill(w, io.sel(i))) + } + + io.out := out(n-1) + } else { + io.out := io.in(0) + } +} + +class ioDecoupled[T <: Data]()(data: => T) extends Bundle +{ + val valid = Bool('input) + val ready = Bool('output) + val bits = data.asInput +} + +class ioArbiter[T <: Data](n: Int)(data: => T) extends Bundle { + val in = Vec(n) { (new ioDecoupled()) { data } } + val out = (new ioDecoupled()) { data }.flip() +} + +class Arbiter[T <: Data](n: Int)(data: => T) extends Component { + val io = new ioArbiter(n)(data) + val dout = Vec(n) { Wire() { data } } + val vout = Wire { Bool() } + + io.in(0).ready := io.out.ready + for (i <- 1 to n-1) { + io.in(i).ready := !io.in(i-1).valid && io.in(i-1).ready + } + + dout(n-1) <== io.in(n-1).bits + for (i <- n-2 to 0) { + dout(i) <== Mux(io.in(i).valid, io.in(i).bits, dout(i+1)) + } + + for (i <- 0 to n-2) { + when (io.in(i).valid) { vout <== Bool(true) } + } + vout <== io.in(n-1).valid + + vout ^^ io.out.valid + dout(0) ^^ io.out.bits +} + class ioPriorityDecoder(in_width: Int, out_width: Int) extends Bundle { val in = UFix(in_width, 'input); @@ -49,4 +122,4 @@ class priorityEncoder(width: Int) extends Component io.out := l_out; } -} \ No newline at end of file +}