From 9dabcab9c240ad179d874cafa757177cc5e92233 Mon Sep 17 00:00:00 2001 From: Howard Mao Date: Wed, 14 Oct 2015 11:33:18 -0700 Subject: [PATCH] Get rid of MemIO in Top and replace with AXI throughout --- csrc/emulator.cc | 46 +++-- csrc/mm.cc | 119 ++++++++---- csrc/mm.h | 122 ++++++++++--- csrc/mm_dramsim2.cc | 122 ++++++------- csrc/mm_dramsim2.h | 77 ++++++-- csrc/vcs_main.cc | 101 +++++++--- fsim/Makefrag | 4 +- src/main/scala/Configs.scala | 28 +-- src/main/scala/RocketChip.scala | 16 +- vsrc/rocketTestHarness.v | 315 +++++++++++++++++++++++--------- 10 files changed, 651 insertions(+), 299 deletions(-) diff --git a/csrc/emulator.cc b/csrc/emulator.cc index 64d99dfa..3eb38547 100644 --- a/csrc/emulator.cc +++ b/csrc/emulator.cc @@ -69,10 +69,12 @@ int main(int argc, char** argv) srand(random_seed); tile.init(random_seed); + uint64_t mem_width = tile.Top__io_mem_r_bits_data.width() / 8; + // Instantiate and initialize main memory mm_t* mm = dramsim2 ? (mm_t*)(new mm_dramsim2_t) : (mm_t*)(new mm_magic_t); try { - mm->init(memsz_mb*1024*1024, tile.Top__io_mem_resp_bits_data.width()/8, LINE_SIZE); + mm->init(memsz_mb*1024*1024, mem_width, LINE_SIZE); } catch (const std::bad_alloc& e) { fprintf(stderr, @@ -104,11 +106,19 @@ int main(int argc, char** argv) while (!htif->done() && trace_count < max_cycles && ret == 0) { - tile.Top__io_mem_req_cmd_ready = LIT<1>(mm->req_cmd_ready()); - tile.Top__io_mem_req_data_ready = LIT<1>(mm->req_data_ready()); - tile.Top__io_mem_resp_valid = LIT<1>(mm->resp_valid()); - tile.Top__io_mem_resp_bits_tag = LIT<64>(mm->resp_tag()); - memcpy(tile.Top__io_mem_resp_bits_data.values, mm->resp_data(), tile.Top__io_mem_resp_bits_data.width()/8); + tile.Top__io_mem_ar_ready = LIT<1>(mm->ar_ready()); + tile.Top__io_mem_aw_ready = LIT<1>(mm->aw_ready()); + tile.Top__io_mem_w_ready = LIT<1>(mm->w_ready()); + + tile.Top__io_mem_b_valid = LIT<1>(mm->b_valid()); + tile.Top__io_mem_b_bits_resp = LIT<64>(mm->b_resp()); + tile.Top__io_mem_b_bits_id = LIT<64>(mm->b_id()); + + tile.Top__io_mem_r_valid = LIT<1>(mm->r_valid()); + tile.Top__io_mem_r_bits_resp = LIT<64>(mm->r_resp()); + tile.Top__io_mem_r_bits_id = LIT<64>(mm->r_id()); + tile.Top__io_mem_r_bits_last = LIT<1>(mm->r_last()); + memcpy(tile.Top__io_mem_r_bits_data.values, mm->r_data(), mem_width); try { tile.clock_lo(LIT<1>(0)); @@ -119,15 +129,25 @@ int main(int argc, char** argv) } mm->tick( - tile.Top__io_mem_req_cmd_valid.lo_word(), - tile.Top__io_mem_req_cmd_bits_rw.lo_word(), - tile.Top__io_mem_req_cmd_bits_addr.lo_word(), - tile.Top__io_mem_req_cmd_bits_tag.lo_word(), + tile.Top__io_mem_ar_valid.lo_word(), + tile.Top__io_mem_ar_bits_addr.lo_word(), + tile.Top__io_mem_ar_bits_id.lo_word(), + tile.Top__io_mem_ar_bits_size.lo_word(), + tile.Top__io_mem_ar_bits_len.lo_word(), - tile.Top__io_mem_req_data_valid.lo_word(), - tile.Top__io_mem_req_data_bits_data.values, + tile.Top__io_mem_aw_valid.lo_word(), + tile.Top__io_mem_aw_bits_addr.lo_word(), + tile.Top__io_mem_aw_bits_id.lo_word(), + tile.Top__io_mem_aw_bits_size.lo_word(), + tile.Top__io_mem_aw_bits_len.lo_word(), - tile.Top__io_mem_resp_ready.to_bool() + tile.Top__io_mem_w_valid.lo_word(), + tile.Top__io_mem_w_bits_strb.lo_word(), + tile.Top__io_mem_w_bits_data.values, + tile.Top__io_mem_w_bits_last.lo_word(), + + tile.Top__io_mem_r_ready.to_bool(), + tile.Top__io_mem_b_ready.to_bool() ); if (tile.Top__io_host_clk_edge.to_bool()) diff --git a/csrc/mm.cc b/csrc/mm.cc index e95b1a2c..ab1117df 100644 --- a/csrc/mm.cc +++ b/csrc/mm.cc @@ -7,12 +7,38 @@ #include #include +void mm_t::write(uint64_t addr, uint8_t *data, uint64_t strb, uint64_t size) +{ + if (addr > this->size) { + fprintf(stderr, "Invalid write address %lx\n", addr); + exit(EXIT_FAILURE); + } + + uint8_t *base = this->data + addr; + for (int i = 0; i < size; i++) { + if (strb & 1) + base[i] = data[i]; + strb >>= 1; + } +} + +std::vector mm_t::read(uint64_t addr, uint64_t size) +{ + if (addr > this->size) { + fprintf(stderr, "Invalid read address %lx\n", addr); + exit(EXIT_FAILURE); + } + + uint8_t *base = this->data + addr; + return std::vector(base, base + size); +} + void mm_t::init(size_t sz, int wsz, int lsz) { assert(wsz > 0 && lsz > 0 && (lsz & (lsz-1)) == 0 && lsz % wsz == 0); word_size = wsz; line_size = lsz; - data = new char[sz]; + data = new uint8_t[sz]; size = sz; } @@ -27,52 +53,67 @@ void mm_magic_t::init(size_t sz, int wsz, int lsz) dummy_data.resize(word_size); } -void mm_magic_t::tick -( - bool req_cmd_val, - bool req_cmd_store, - uint64_t req_cmd_addr, - uint64_t req_cmd_tag, - bool req_data_val, - void* req_data_bits, - bool resp_rdy -) +void mm_magic_t::tick( + bool ar_valid, + uint64_t ar_addr, + uint64_t ar_id, + uint64_t ar_size, + uint64_t ar_len, + + bool aw_valid, + uint64_t aw_addr, + uint64_t aw_id, + uint64_t aw_size, + uint64_t aw_len, + + bool w_valid, + uint64_t w_strb, + void *w_data, + bool w_last, + + bool r_ready, + bool b_ready) { - bool req_cmd_fire = req_cmd_val && req_cmd_ready(); - bool req_data_fire = req_data_val && req_data_ready(); - bool resp_fire = resp_valid() && resp_rdy; - assert(!(req_cmd_fire && req_data_fire)); + bool ar_fire = ar_valid && ar_ready(); + bool aw_fire = aw_valid && aw_ready(); + bool w_fire = w_valid && w_ready(); + bool r_fire = r_valid() && r_ready; + bool b_fire = b_valid() && b_ready; - if (resp_fire) - resp.pop(); + if (ar_fire) { + uint64_t word_size = (1 << ar_size); + for (int i = 0; i <= ar_len; i++) { + auto dat = read(ar_addr + i * word_size, word_size); + rresp.push(mm_rresp_t(ar_id, dat, i == ar_len)); + } + } - if (req_data_fire) - { - memcpy(data + store_addr + store_count*word_size, req_data_bits, word_size); + if (aw_fire) { + store_addr = aw_addr; + store_size = (1 << aw_size); + store_id = aw_id; + store_inflight = true; + store_count = aw_len + 1; + } - store_count = (store_count + 1) % (line_size/word_size); - if (store_count == 0) + if (w_fire) { + write(store_addr, (uint8_t *) w_data, w_strb, store_size); + store_addr += store_size; + store_count--; + + if (store_count == 0) { store_inflight = false; - } - - if (req_cmd_fire) - { - auto byte_addr = req_cmd_addr * line_size; - assert(byte_addr < size); - - if (req_cmd_store) - { - store_inflight = true; - store_addr = byte_addr; - } - else for (int i = 0; i < line_size/word_size; i++) - { - auto base = data + byte_addr + i*word_size; - auto dat = std::vector(base, base + word_size); - resp.push(std::make_pair(req_cmd_tag, dat)); + bresp.push(store_id); + assert(w_last); } } + if (b_fire) + bresp.pop(); + + if (r_fire) + rresp.pop(); + cycle++; } diff --git a/csrc/mm.h b/csrc/mm.h index 30727a2a..5e647c99 100644 --- a/csrc/mm.h +++ b/csrc/mm.h @@ -10,6 +10,9 @@ const int LINE_SIZE = 64; // all cores assume this. const size_t MEM_SIZE = 1L * 1024*1024*1024; +void write_masked_data( + uint8_t *base, uint8_t *data, uint64_t strb, uint64_t size); + class mm_t { public: @@ -17,21 +20,39 @@ class mm_t virtual void init(size_t sz, int word_size, int line_size); - virtual bool req_cmd_ready() = 0; - virtual bool req_data_ready() = 0; - virtual bool resp_valid() = 0; - virtual uint64_t resp_tag() = 0; - virtual void* resp_data() = 0; + virtual bool ar_ready() = 0; + virtual bool aw_ready() = 0; + virtual bool w_ready() = 0; + virtual bool b_valid() = 0; + virtual uint64_t b_resp() = 0; + virtual uint64_t b_id() = 0; + virtual bool r_valid() = 0; + virtual uint64_t r_resp() = 0; + virtual uint64_t r_id() = 0; + virtual void *r_data() = 0; + virtual bool r_last() = 0; virtual void tick ( - bool req_cmd_val, - bool req_cmd_store, - uint64_t req_cmd_addr, - uint64_t req_cmd_tag, - bool req_data_val, - void* req_data_bits, - bool resp_rdy + bool ar_valid, + uint64_t ar_addr, + uint64_t ar_id, + uint64_t ar_size, + uint64_t ar_len, + + bool aw_valid, + uint64_t aw_addr, + uint64_t aw_id, + uint64_t aw_size, + uint64_t aw_len, + + bool w_valid, + uint64_t w_strb, + void *w_data, + bool w_last, + + bool r_ready, + bool b_ready ) = 0; virtual void* get_data() { return data; } @@ -39,47 +60,92 @@ class mm_t virtual size_t get_word_size() { return word_size; } virtual size_t get_line_size() { return line_size; } + void write(uint64_t addr, uint8_t *data, uint64_t strb, uint64_t size); + std::vector read(uint64_t addr, uint64_t size); + virtual ~mm_t(); protected: - char* data; + uint8_t* data; size_t size; int word_size; int line_size; }; +struct mm_rresp_t +{ + uint64_t id; + std::vector data; + bool last; + + mm_rresp_t(uint64_t id, std::vector data, bool last) + { + this->id = id; + this->data = data; + this->last = last; + } + + mm_rresp_t() + { + this->id = 0; + this->last = false; + } +}; + class mm_magic_t : public mm_t { public: - mm_magic_t() : store_inflight(false), store_count(0) {} + mm_magic_t() : store_inflight(false) {} virtual void init(size_t sz, int word_size, int line_size); - virtual bool req_cmd_ready() { return !store_inflight; } - virtual bool req_data_ready() { return store_inflight; } - virtual bool resp_valid() { return !resp.empty(); } - virtual uint64_t resp_tag() { return resp_valid() ? resp.front().first : 0; } - virtual void* resp_data() { return resp_valid() ? &resp.front().second[0] : &dummy_data[0]; } + virtual bool ar_ready() { return true; } + virtual bool aw_ready() { return !store_inflight; } + virtual bool w_ready() { return store_inflight; } + virtual bool b_valid() { return !bresp.empty(); } + virtual uint64_t b_resp() { return 0; } + virtual uint64_t b_id() { return b_valid() ? bresp.front() : 0; } + virtual bool r_valid() { return !rresp.empty(); } + virtual uint64_t r_resp() { return 0; } + virtual uint64_t r_id() { return r_valid() ? rresp.front().id: 0; } + virtual void *r_data() { return r_valid() ? &rresp.front().data[0] : &dummy_data[0]; } + virtual bool r_last() { return r_valid() ? rresp.front().last : false; } virtual void tick ( - bool req_cmd_val, - bool req_cmd_store, - uint64_t req_cmd_addr, - uint64_t req_cmd_tag, - bool req_data_val, - void* req_data_bits, - bool resp_rdy + bool ar_valid, + uint64_t ar_addr, + uint64_t ar_id, + uint64_t ar_size, + uint64_t ar_len, + + bool aw_valid, + uint64_t aw_addr, + uint64_t aw_id, + uint64_t aw_size, + uint64_t aw_len, + + bool w_valid, + uint64_t w_strb, + void *w_data, + bool w_last, + + bool r_ready, + bool b_ready ); protected: bool store_inflight; - int store_count; uint64_t store_addr; + uint64_t store_id; + uint64_t store_size; + uint64_t store_count; std::vector dummy_data; + std::queue bresp; + + std::queue rresp; uint64_t cycle; - std::queue>> resp; }; void load_mem(void* mem, const char* fn); diff --git a/csrc/mm_dramsim2.cc b/csrc/mm_dramsim2.cc index 8af4ffca..f7809289 100644 --- a/csrc/mm_dramsim2.cc +++ b/csrc/mm_dramsim2.cc @@ -17,27 +17,17 @@ using namespace DRAMSim; void mm_dramsim2_t::read_complete(unsigned id, uint64_t address, uint64_t clock_cycle) { - assert(req.count(address)); - auto tag = req[address]; - req.erase(address); - - for (int i = 0; i < line_size/word_size; i++) - { - auto base = data + address + i*word_size; - auto dat = std::vector(base, base + word_size); - resp.push(std::make_pair(tag, dat)); + auto req = rreq[address]; + for (int i = 0; i < req.len; i++) { + auto dat = read(address + i * req.size, req.size); + rresp.push(mm_rresp_t(req.id, dat, (i == req.len - 1))); } - -#ifdef DEBUG_DRAMSIM2 - fprintf(stderr, "[Callback] read complete: id=%d , addr=0x%lx , cycle=%lu\n", id, address, clock_cycle); -#endif } void mm_dramsim2_t::write_complete(unsigned id, uint64_t address, uint64_t clock_cycle) { -#ifdef DEBUG_DRAMSIM2 - fprintf(stderr, "[Callback] write complete: id=%d , addr=0x%lx , cycle=%lu\n", id, address, clock_cycle); -#endif + auto b_id = wreq[address]; + bresp.push(b_id); } void power_callback(double a, double b, double c, double d) @@ -64,65 +54,65 @@ void mm_dramsim2_t::init(size_t sz, int wsz, int lsz) #endif } -void mm_dramsim2_t::tick -( - bool req_cmd_val, - bool req_cmd_store, - uint64_t req_cmd_addr, - uint64_t req_cmd_tag, - bool req_data_val, - void* req_data_bits, - bool resp_rdy -) +void mm_dramsim2_t::tick( + bool ar_valid, + uint64_t ar_addr, + uint64_t ar_id, + uint64_t ar_size, + uint64_t ar_len, + + bool aw_valid, + uint64_t aw_addr, + uint64_t aw_id, + uint64_t aw_size, + uint64_t aw_len, + + bool w_valid, + uint64_t w_strb, + void *w_data, + bool w_last, + + bool r_ready, + bool b_ready) { - bool req_cmd_fire = req_cmd_val && req_cmd_ready(); - bool req_data_fire = req_data_val && req_data_ready(); - bool resp_fire = resp_valid() && resp_rdy; - assert(!(req_cmd_fire && req_data_fire)); + bool ar_fire = ar_valid && ar_ready(); + bool aw_fire = aw_valid && aw_ready(); + bool w_fire = w_valid && w_ready(); + bool r_fire = r_valid() && r_ready; + bool b_fire = b_valid() && b_ready; - if (resp_fire) - resp.pop(); - - if (req_cmd_fire) - { - // since the I$ can speculatively ask for address that are out of bounds - auto byte_addr = (req_cmd_addr * line_size) % size; - - if (req_cmd_store) - { - store_inflight = 1; - store_addr = byte_addr; -#ifdef DEBUG_DRAMSIM2 - fprintf(stderr, "Starting store transaction (addr=%lx ; tag=%ld ; cyc=%ld)\n", store_addr, req_cmd_tag, cycle); -#endif - } - else - { - assert(!req.count(byte_addr)); - req[byte_addr] = req_cmd_tag; - - mem->addTransaction(false, byte_addr); -#ifdef DEBUG_DRAMSIM2 - fprintf(stderr, "Adding load transaction (addr=%lx; cyc=%ld)\n", byte_addr, cycle); -#endif - } + if (ar_fire) { + rreq[ar_addr] = mm_req_t(ar_id, 1 << ar_size, ar_len + 1, ar_addr); + mem->addTransaction(false, ar_addr); } - if (req_data_fire) - { - memcpy(data + store_addr + store_count*word_size, req_data_bits, word_size); + if (aw_fire) { + store_addr = aw_addr; + store_size = (1 << aw_size); + store_id = aw_id; + store_count = aw_len + 1; + store_inflight = true; + } - store_count = (store_count + 1) % (line_size/word_size); - if (store_count == 0) - { // last chunch of cache line arrived. - store_inflight = 0; + if (w_fire) { + write(store_addr, (uint8_t *) w_data, w_strb, store_size); + store_addr += store_size; + store_count--; + + if (store_count == 0) { + store_inflight = false; mem->addTransaction(true, store_addr); -#ifdef DEBUG_DRAMSIM2 - fprintf(stderr, "Adding store transaction (addr=%lx; cyc=%ld)\n", store_addr, cycle); -#endif + wreq[store_addr] = store_id; + assert(w_last); } } + if (b_fire) + bresp.pop(); + + if (r_fire) + rresp.pop(); + mem->update(); cycle++; } diff --git a/csrc/mm_dramsim2.h b/csrc/mm_dramsim2.h index 398995cb..3ade8cf2 100644 --- a/csrc/mm_dramsim2.h +++ b/csrc/mm_dramsim2.h @@ -9,28 +9,69 @@ #include #include +struct mm_req_t { + uint64_t id; + uint64_t size; + uint64_t len; + uint64_t addr; + + mm_req_t(uint64_t id, uint64_t size, uint64_t len, uint64_t addr) + { + this->id = id; + this->size = size; + this->len = len; + this->addr = addr; + } + + mm_req_t() + { + this->id = 0; + this->size = 0; + this->len = 0; + this->addr = 0; + } +}; + class mm_dramsim2_t : public mm_t { public: - mm_dramsim2_t() : store_inflight(false), store_count(0) {} + mm_dramsim2_t() : store_inflight(false) {} virtual void init(size_t sz, int word_size, int line_size); - virtual bool req_cmd_ready() { return mem->willAcceptTransaction() && !store_inflight; } - virtual bool req_data_ready() { return mem->willAcceptTransaction() && store_inflight; } - virtual bool resp_valid() { return !resp.empty(); } - virtual uint64_t resp_tag() { return resp_valid() ? resp.front().first : 0; } - virtual void* resp_data() { return resp_valid() ? &resp.front().second[0] : &dummy_data[0]; } + virtual bool ar_ready() { return mem->willAcceptTransaction(); } + virtual bool aw_ready() { return mem->willAcceptTransaction() && !store_inflight; } + virtual bool w_ready() { return store_inflight; } + virtual bool b_valid() { return !bresp.empty(); } + virtual uint64_t b_resp() { return 0; } + virtual uint64_t b_id() { return b_valid() ? bresp.front() : 0; } + virtual bool r_valid() { return !rresp.empty(); } + virtual uint64_t r_resp() { return 0; } + virtual uint64_t r_id() { return r_valid() ? rresp.front().id: 0; } + virtual void *r_data() { return r_valid() ? &rresp.front().data[0] : &dummy_data[0]; } + virtual bool r_last() { return r_valid() ? rresp.front().last : false; } virtual void tick ( - bool req_cmd_val, - bool req_cmd_store, - uint64_t req_cmd_addr, - uint64_t req_cmd_tag, - bool req_data_val, - void* req_data_bits, - bool resp_rdy + bool ar_valid, + uint64_t ar_addr, + uint64_t ar_id, + uint64_t ar_size, + uint64_t ar_len, + + bool aw_valid, + uint64_t aw_addr, + uint64_t aw_id, + uint64_t aw_size, + uint64_t aw_len, + + bool w_valid, + uint64_t w_strb, + void *w_data, + bool w_last, + + bool r_ready, + bool b_ready ); @@ -39,12 +80,16 @@ class mm_dramsim2_t : public mm_t uint64_t cycle; bool store_inflight; - int store_count; uint64_t store_addr; + uint64_t store_id; + uint64_t store_size; + uint64_t store_count; std::vector dummy_data; + std::queue bresp; + std::map wreq; - std::map req; - std::queue>> resp; + std::map rreq; + std::queue rresp; void read_complete(unsigned id, uint64_t address, uint64_t clock_cycle); void write_complete(unsigned id, uint64_t address, uint64_t clock_cycle); diff --git a/csrc/vcs_main.cc b/csrc/vcs_main.cc index 6ed09362..8b0a3e31 100644 --- a/csrc/vcs_main.cc +++ b/csrc/vcs_main.cc @@ -48,50 +48,95 @@ int main(int argc, char** argv) } void memory_tick( - vc_handle mem_req_val, - vc_handle mem_req_rdy, - vc_handle mem_req_store, - vc_handle mem_req_addr, - vc_handle mem_req_tag, + vc_handle ar_valid, + vc_handle ar_ready, + vc_handle ar_addr, + vc_handle ar_id, + vc_handle ar_size, + vc_handle ar_len, - vc_handle mem_req_data_val, - vc_handle mem_req_data_rdy, - vc_handle mem_req_data_bits, + vc_handle aw_valid, + vc_handle aw_ready, + vc_handle aw_addr, + vc_handle aw_id, + vc_handle aw_size, + vc_handle aw_len, - vc_handle mem_resp_val, - vc_handle mem_resp_rdy, - vc_handle mem_resp_tag, - vc_handle mem_resp_data) + vc_handle w_valid, + vc_handle w_ready, + vc_handle w_strb, + vc_handle w_data, + vc_handle w_last, + + vc_handle r_valid, + vc_handle r_ready, + vc_handle r_resp, + vc_handle r_id, + vc_handle r_data, + vc_handle r_last, + + vc_handle b_valid, + vc_handle b_ready, + vc_handle b_resp, + vc_handle b_id) { - uint32_t req_data[mm->get_word_size()/sizeof(uint32_t)]; + uint32_t write_data[mm->get_word_size()/sizeof(uint32_t)]; for (size_t i = 0; i < mm->get_word_size()/sizeof(uint32_t); i++) - req_data[i] = vc_4stVectorRef(mem_req_data_bits)[i].d; + write_data[i] = vc_4stVectorRef(w_data)[i].d; - vc_putScalar(mem_req_rdy, mm->req_cmd_ready()); - vc_putScalar(mem_req_data_rdy, mm->req_data_ready()); - vc_putScalar(mem_resp_val, mm->resp_valid()); + vc_putScalar(ar_ready, mm->ar_ready()); + vc_putScalar(aw_ready, mm->aw_ready()); + vc_putScalar(w_ready, mm->w_ready()); + vc_putScalar(b_valid, mm->b_valid()); + vc_putScalar(r_valid, mm->r_valid()); + vc_putScalar(r_last, mm->r_last()); vec32 d[mm->get_word_size()/sizeof(uint32_t)]; + d[0].c = 0; - d[0].d = mm->resp_tag(); - vc_put4stVector(mem_resp_tag, d); + d[0].d = mm->b_resp(); + vc_put4stVector(b_resp, d); + + d[0].c = 0; + d[0].d = mm->b_id(); + vc_put4stVector(b_id, d); + + d[0].c = 0; + d[0].d = mm->r_resp(); + vc_put4stVector(r_resp, d); + + d[0].c = 0; + d[0].d = mm->r_id(); + vc_put4stVector(r_id, d); for (size_t i = 0; i < mm->get_word_size()/sizeof(uint32_t); i++) { d[i].c = 0; - d[i].d = ((uint32_t*)mm->resp_data())[i]; + d[i].d = ((uint32_t*)mm->r_data())[i]; } - vc_put4stVector(mem_resp_data, d); + vc_put4stVector(r_data, d); mm->tick ( - vc_getScalar(mem_req_val), - vc_getScalar(mem_req_store), - vc_4stVectorRef(mem_req_addr)->d, - vc_4stVectorRef(mem_req_tag)->d, - vc_getScalar(mem_req_data_val), - req_data, - vc_getScalar(mem_resp_rdy) + vc_getScalar(ar_valid), + vc_4stVectorRef(ar_addr)->d, + vc_4stVectorRef(ar_id)->d, + vc_4stVectorRef(ar_size)->d, + vc_4stVectorRef(ar_len)->d, + + vc_getScalar(aw_valid), + vc_4stVectorRef(aw_addr)->d, + vc_4stVectorRef(aw_id)->d, + vc_4stVectorRef(aw_size)->d, + vc_4stVectorRef(aw_len)->d, + + vc_getScalar(w_valid), + vc_4stVectorRef(w_strb)->d, + write_data, + vc_getScalar(w_last), + + vc_getScalar(r_ready), + vc_getScalar(b_ready) ); } diff --git a/fsim/Makefrag b/fsim/Makefrag index a08fd03c..6a9dab4e 100644 --- a/fsim/Makefrag +++ b/fsim/Makefrag @@ -52,13 +52,13 @@ VCS_OPTS = -notice -line +lint=all,noVCDE,noONGS,noUI -error=PCWM-L -timescale=1 # Build the simulator #-------------------------------------------------------------------- -simv = $(sim_dir)/simv-$(CONFIG) +simv = $(sim_dir)/simv-$(MODEL)-$(CONFIG) $(simv) : $(sim_vsrcs) $(sim_csrcs) $(sim_dir)/libdramsim.a cd $(sim_dir) && \ $(VCS) $(VCS_OPTS) -o $(simv) \ -debug_pp \ -simv_debug = $(sim_dir)/simv-$(CONFIG)-debug +simv_debug = $(sim_dir)/simv-$(MODEL)-$(CONFIG)-debug $(simv_debug) : $(sim_vsrcs) $(sim_csrcs) $(sim_dir)/libdramsim.a cd $(sim_dir) && \ $(VCS) $(VCS_OPTS) -o $(simv_debug) \ diff --git a/src/main/scala/Configs.scala b/src/main/scala/Configs.scala index f5290046..9295560f 100644 --- a/src/main/scala/Configs.scala +++ b/src/main/scala/Configs.scala @@ -40,20 +40,22 @@ class DefaultConfig extends Config ( case PPNBits => site(PAddrBits) - site(PgIdxBits) case VAddrBits => site(VPNBits) + site(PgIdxBits) case ASIdBits => 7 - case MIFTagBits => Dump("MEM_TAG_BITS", - // Bits needed at the L2 agent - log2Up(site(NAcquireTransactors)+2) + - // Bits added by NASTI interconnect - log2Up(site(NMemoryChannels) * site(NBanksPerMemoryChannel) + 1) + - // Bits added by final arbiter (not needed if true multichannel memory) - log2Up(site(NMemoryChannels))) - case MIFDataBits => Dump("MEM_DATA_BITS", 128) - case MIFAddrBits => Dump("MEM_ADDR_BITS", site(PAddrBits) - site(CacheBlockOffsetBits)) + case MIFTagBits => // Bits needed at the L2 agent + log2Up(site(NAcquireTransactors)+2) + + // Bits added by NASTI interconnect + log2Up(site(NMemoryChannels) * site(NBanksPerMemoryChannel) + 1) + + // Bits added by final arbiter (not needed if true multichannel memory) + log2Up(site(NMemoryChannels)) + case MIFDataBits => 64 + case MIFAddrBits => site(PAddrBits) - site(CacheBlockOffsetBits) case MIFDataBeats => site(CacheBlockBytes) * 8 / site(MIFDataBits) - case NastiKey => NastiParameters( - dataBits = site(MIFDataBits), - addrBits = site(PAddrBits), - idBits = site(MIFTagBits)) + case NastiKey => { + Dump("MEM_STRB_BITS", site(MIFDataBits) / 8) + NastiParameters( + dataBits = Dump("MEM_DATA_BITS", site(MIFDataBits)), + addrBits = Dump("MEM_ADDR_BITS", site(PAddrBits)), + idBits = Dump("MEM_ID_BITS", site(MIFTagBits))) + } //Params used by all caches case NSets => findBy(CacheName) case NWays => findBy(CacheName) diff --git a/src/main/scala/RocketChip.scala b/src/main/scala/RocketChip.scala index 68ea23a3..7717f8da 100644 --- a/src/main/scala/RocketChip.scala +++ b/src/main/scala/RocketChip.scala @@ -68,7 +68,7 @@ class BasicTopIO(implicit val p: Parameters) extends ParameterizedBundle()(p) } class TopIO(implicit p: Parameters) extends BasicTopIO()(p) { - val mem = new MemIO + val mem = new NastiIO } class MultiChannelTopIO(implicit p: Parameters) extends BasicTopIO()(p) { @@ -96,15 +96,19 @@ class Top(topParams: Parameters) extends Module with HasTopLevelParameters { val temp = Module(new MultiChannelTop) val arb = Module(new NastiArbiter(nMemChannels)) - val conv = Module(new MemIONastiIOConverter(p(CacheBlockOffsetBits))) arb.io.master <> temp.io.mem - conv.io.nasti <> arb.io.slave - io.mem.req_cmd <> Queue(conv.io.mem.req_cmd) - io.mem.req_data <> Queue(conv.io.mem.req_data, mifDataBeats) - conv.io.mem.resp <> Queue(io.mem.resp, mifDataBeats) + io.mem.ar <> Queue(arb.io.slave.ar) + io.mem.aw <> Queue(arb.io.slave.aw) + io.mem.w <> Queue(arb.io.slave.w) + arb.io.slave.r <> Queue(io.mem.r) + arb.io.slave.b <> Queue(io.mem.b) io.mem_backup_ctrl <> temp.io.mem_backup_ctrl io.host <> temp.io.host + // Memory cache type should be normal non-cacheable bufferable + io.mem.ar.bits.cache := UInt("b0011") + io.mem.aw.bits.cache := UInt("b0011") + // tie off the mmio port val errslave = Module(new NastiErrorSlave) errslave.io <> temp.io.mmio diff --git a/vsrc/rocketTestHarness.v b/vsrc/rocketTestHarness.v index d4625eb6..ea3d9d55 100644 --- a/vsrc/rocketTestHarness.v +++ b/vsrc/rocketTestHarness.v @@ -23,22 +23,39 @@ extern "A" void htif_tick extern "A" void memory_tick ( - input reg mem_req_valid, - output reg mem_req_ready, - input reg mem_req_store, - input reg [`MEM_ADDR_BITS-1:0] mem_req_bits_addr, - input reg [`MEM_TAG_BITS-1:0] mem_req_bits_tag, + input reg ar_valid, + output reg ar_ready, + input reg [`MEM_ADDR_BITS-1:0] ar_addr, + input reg [`MEM_ID_BITS-1:0] ar_id, + input reg [2:0] ar_size, + input reg [7:0] ar_len, - input reg mem_req_data_valid, - output reg mem_req_data_ready, - input reg [`MEM_DATA_BITS-1:0] mem_req_data_bits, - - output reg mem_resp_valid, - input reg mem_resp_ready, - output reg [`MEM_TAG_BITS-1:0] mem_resp_bits_tag, - output reg [`MEM_DATA_BITS-1:0] mem_resp_bits_data + input reg aw_valid, + output reg aw_ready, + input reg [`MEM_ADDR_BITS-1:0] aw_addr, + input reg [`MEM_ID_BITS-1:0] aw_id, + input reg [2:0] aw_size, + input reg [7:0] aw_len, + + input reg w_valid, + output reg w_ready, + input reg [`MEM_STRB_BITS-1:0] w_strb, + input reg [`MEM_DATA_BITS-1:0] w_data, + input reg w_last, + + output reg r_valid, + input reg r_ready, + output reg [1:0] r_resp, + output reg [`MEM_ID_BITS-1:0] r_id, + output reg [`MEM_DATA_BITS-1:0] r_data, + output reg r_last, + + output reg b_valid, + input reg b_ready, + output reg [1:0] b_resp, + output reg [`MEM_ID_BITS-1:0] b_id ); - + module rocketTestHarness; reg [31:0] seed; @@ -47,33 +64,51 @@ module rocketTestHarness; //----------------------------------------------- // Instantiate the processor - reg clk = 0; - reg reset = 1; + reg clk = 1'b0; + reg reset = 1'b1; reg r_reset; - reg start = 0; + reg start = 1'b0; always #`CLOCK_PERIOD clk = ~clk; - wire mem_req_valid; - reg mem_req_ready; - wire mem_req_bits_rw; - wire [`MEM_ADDR_BITS-1:0] mem_req_bits_addr; - wire [`MEM_TAG_BITS-1:0] mem_req_bits_tag; + wire ar_valid; + reg ar_ready; + wire [`MEM_ADDR_BITS-1:0] ar_addr; + wire [`MEM_ID_BITS-1:0] ar_id; + wire [2:0] ar_size; + wire [7:0] ar_len; - wire mem_req_data_valid; - reg mem_req_data_ready; - wire [`MEM_DATA_BITS-1:0] mem_req_data_bits; + wire aw_valid; + reg aw_ready; + wire [`MEM_ADDR_BITS-1:0] aw_addr; + wire [`MEM_ID_BITS-1:0] aw_id; + wire [2:0] aw_size; + wire [7:0] aw_len; - reg mem_resp_valid; - wire mem_resp_ready; - reg [`MEM_TAG_BITS-1:0] mem_resp_bits_tag; - reg [`MEM_DATA_BITS-1:0] mem_resp_bits_data; + wire w_valid; + reg w_ready; + wire [`MEM_STRB_BITS-1:0] w_strb; + wire [`MEM_DATA_BITS-1:0] w_data; + wire w_last; + + reg r_valid; + wire r_ready; + reg [1:0] r_resp; + reg [`MEM_ID_BITS-1:0] r_id; + reg [`MEM_DATA_BITS-1:0] r_data; + reg r_last; + + reg b_valid; + wire b_ready; + reg [1:0] b_resp; + reg [`MEM_ID_BITS-1:0] b_id; reg htif_out_ready; wire htif_in_valid; wire [`HTIF_WIDTH-1:0] htif_in_bits; wire htif_in_ready, htif_out_valid; wire [`HTIF_WIDTH-1:0] htif_out_bits; + wire htif_out_stats; wire mem_bk_in_valid; wire mem_bk_out_valid; @@ -91,25 +126,47 @@ module rocketTestHarness; wire htif_out_stats_delay; assign #0.1 htif_out_stats = htif_out_stats_delay; - wire mem_req_valid_delay; assign #0.1 mem_req_valid = mem_req_valid_delay; - wire #0.1 mem_req_ready_delay = mem_req_ready; - wire [`MEM_TAG_BITS-1:0] mem_req_bits_tag_delay; assign #0.1 mem_req_bits_tag = mem_req_bits_tag_delay; - wire [`MEM_ADDR_BITS-1:0] mem_req_bits_addr_delay; assign #0.1 mem_req_bits_addr = mem_req_bits_addr_delay; - wire mem_req_bits_rw_delay; assign #0.1 mem_req_bits_rw = mem_req_bits_rw_delay; + wire ar_valid_delay; assign #0.1 ar_valid = ar_valid_delay; + wire #0.1 ar_ready_delay = ar_ready; + wire [`MEM_ADDR_BITS-1:0] ar_addr_delay; assign #0.1 ar_addr = ar_addr_delay; + wire [`MEM_ID_BITS-1:0] ar_id_delay; assign #0.1 ar_id = ar_id_delay; + wire [2:0] ar_size_delay; assign #0.1 ar_size = ar_size_delay; + wire [7:0] ar_len_delay; assign #0.1 ar_len = ar_len_delay; - wire mem_req_data_valid_delay; assign #0.1 mem_req_data_valid = mem_req_data_valid_delay; - wire #0.1 mem_req_data_ready_delay = mem_req_data_ready; - wire [`MEM_DATA_BITS-1:0] mem_req_data_bits_delay; assign #0.1 mem_req_data_bits = mem_req_data_bits_delay; + wire aw_valid_delay; assign #0.1 aw_valid = aw_valid_delay; + wire #0.1 aw_ready_delay = aw_ready; + wire [`MEM_ADDR_BITS-1:0] aw_addr_delay; assign #0.1 aw_addr = aw_addr_delay; + wire [`MEM_ID_BITS-1:0] aw_id_delay; assign #0.1 aw_id = aw_id_delay; + wire [2:0] aw_size_delay; assign #0.1 aw_size = aw_size_delay; + wire [7:0] aw_len_delay; assign #0.1 aw_len = aw_len_delay; - wire #0.1 mem_resp_valid_delay = mem_resp_valid; - wire mem_resp_ready_delay; assign #0.1 mem_resp_ready = mem_resp_ready_delay; - wire [`MEM_TAG_BITS-1:0] #0.1 mem_resp_bits_tag_delay = mem_resp_bits_tag; - wire [`MEM_DATA_BITS-1:0] #0.1 mem_resp_bits_data_delay = mem_resp_bits_data; + wire w_valid_delay; assign #0.1 w_valid = w_valid_delay; + wire #0.1 w_ready_delay = w_ready; + wire [`MEM_STRB_BITS-1:0] w_strb_delay; assign #0.1 w_strb = w_strb_delay; + wire [`MEM_DATA_BITS-1:0] w_data_delay; assign #0.1 w_data = w_data_delay; + wire w_last_delay; assign #0.1 w_last = w_last_delay; + + wire #0.1 r_valid_delay = r_valid; + wire r_ready_delay; assign #0.1 r_ready = r_ready_delay; + wire [1:0] #0.1 r_resp_delay = r_resp; + wire [`MEM_ID_BITS-1:0] #0.1 r_id_delay = r_id; + wire [`MEM_DATA_BITS-1:0] #0.1 r_data_delay = r_data; + wire #0.1 r_last_delay = r_last; + + wire #0.1 b_valid_delay = b_valid; + wire b_ready_delay; assign #0.1 b_ready = b_ready_delay; + wire [1:0] #0.1 b_resp_delay = b_resp; + wire [`MEM_ID_BITS-1:0] #0.1 b_id_delay = b_id; wire #0.1 mem_bk_out_ready_delay = mem_bk_out_ready; wire #0.1 mem_bk_in_valid_delay = mem_bk_in_valid; wire mem_bk_out_valid_delay; assign #0.1 mem_bk_out_valid = mem_bk_out_valid_delay; +`ifdef FPGA + assign mem_bk_out_valid_delay = 1'b0; + assign htif_out_stats_delay = 1'b0; +`endif + Top dut ( .clk(clk), @@ -131,26 +188,69 @@ module rocketTestHarness; .io_mem_backup_ctrl_en(1'b1), `else .io_mem_backup_ctrl_en(1'b0), -`endif +`endif // MEM_BACKUP_EN .io_mem_backup_ctrl_in_valid(mem_bk_in_valid_delay), .io_mem_backup_ctrl_out_ready(mem_bk_out_ready_delay), .io_mem_backup_ctrl_out_valid(mem_bk_out_valid_delay), -`endif +`else + .io_host_clk (), + .io_host_clk_edge (), + .io_host_debug_stats_csr (), - .io_mem_req_cmd_valid(mem_req_valid_delay), - .io_mem_req_cmd_ready(mem_req_ready_delay), - .io_mem_req_cmd_bits_rw(mem_req_bits_rw_delay), - .io_mem_req_cmd_bits_addr(mem_req_bits_addr_delay), - .io_mem_req_cmd_bits_tag(mem_req_bits_tag_delay), + .io_mem_backup_ctrl_en (1'b0), + .io_mem_backup_ctrl_in_valid (1'b0), + .io_mem_backup_ctrl_out_ready (1'b0), + .io_mem_backup_ctrl_out_valid (), +`endif // FPGA - .io_mem_req_data_valid(mem_req_data_valid_delay), - .io_mem_req_data_ready(mem_req_data_ready_delay), - .io_mem_req_data_bits_data(mem_req_data_bits_delay), + .io_mem_ar_valid (ar_valid_delay), + .io_mem_ar_ready (ar_ready_delay), + .io_mem_ar_bits_addr (ar_addr_delay), + .io_mem_ar_bits_id (ar_id_delay), + .io_mem_ar_bits_size (ar_size_delay), + .io_mem_ar_bits_len (ar_len_delay), + .io_mem_ar_bits_burst (), + .io_mem_ar_bits_lock (), + .io_mem_ar_bits_cache (), + .io_mem_ar_bits_prot (), + .io_mem_ar_bits_qos (), + .io_mem_ar_bits_region (), + .io_mem_ar_bits_user (), - .io_mem_resp_valid(mem_resp_valid_delay), - .io_mem_resp_ready(mem_resp_ready_delay), - .io_mem_resp_bits_tag(mem_resp_bits_tag_delay), - .io_mem_resp_bits_data(mem_resp_bits_data_delay) + .io_mem_aw_valid (aw_valid_delay), + .io_mem_aw_ready (aw_ready_delay), + .io_mem_aw_bits_addr (aw_addr_delay), + .io_mem_aw_bits_id (aw_id_delay), + .io_mem_aw_bits_size (aw_size_delay), + .io_mem_aw_bits_len (aw_len_delay), + .io_mem_aw_bits_burst (), + .io_mem_aw_bits_lock (), + .io_mem_aw_bits_cache (), + .io_mem_aw_bits_prot (), + .io_mem_aw_bits_qos (), + .io_mem_aw_bits_region (), + .io_mem_aw_bits_user (), + + .io_mem_w_valid (w_valid_delay), + .io_mem_w_ready (w_ready_delay), + .io_mem_w_bits_strb (w_strb_delay), + .io_mem_w_bits_data (w_data_delay), + .io_mem_w_bits_last (w_last_delay), + .io_mem_w_bits_user (), + + .io_mem_r_valid (r_valid_delay), + .io_mem_r_ready (r_ready_delay), + .io_mem_r_bits_resp (r_resp_delay), + .io_mem_r_bits_id (r_id_delay), + .io_mem_r_bits_data (r_data_delay), + .io_mem_r_bits_last (r_last_delay), + .io_mem_r_bits_user (1'b0), + + .io_mem_b_valid (b_valid_delay), + .io_mem_b_ready (b_ready_delay), + .io_mem_b_bits_resp (b_resp_delay), + .io_mem_b_bits_id (b_id_delay), + .io_mem_b_bits_user (1'b0) ); `ifdef FPGA @@ -165,40 +265,63 @@ module rocketTestHarness; r_reset <= reset; if (reset || r_reset) begin - mem_req_ready <= 0; - mem_req_data_ready <= 0; - mem_resp_valid <= 0; - mem_resp_bits_tag <= 0; - mem_resp_bits_data <= 0; + ar_ready <= 1'b0; + aw_ready <= 1'b0; + w_ready <= 1'b0; + r_valid <= 1'b0; + r_resp <= 2'b0; + r_id <= {`MEM_ID_BITS {1'b0}}; + r_data <= {`MEM_DATA_BITS {1'b0}}; + r_last <= 1'b0; + b_valid <= 1'b0; + b_resp <= 2'b0; + b_id <= {`MEM_ID_BITS {1'b0}}; end else begin memory_tick ( - mem_req_valid, - mem_req_ready, - mem_req_bits_rw, - mem_req_bits_addr, - mem_req_bits_tag, + ar_valid, + ar_ready, + ar_addr, + ar_id, + ar_size, + ar_len, - mem_req_data_valid, - mem_req_data_ready, - mem_req_data_bits, - - mem_resp_valid, - mem_resp_ready, - mem_resp_bits_tag, - mem_resp_bits_data + aw_valid, + aw_ready, + aw_addr, + aw_id, + aw_size, + aw_len, + + w_valid, + w_ready, + w_strb, + w_data, + w_last, + + r_valid, + r_ready, + r_resp, + r_id, + r_data, + r_last, + + b_valid, + b_ready, + b_resp, + b_id ); end end wire mem_bk_req_valid, mem_bk_req_rw, mem_bk_req_data_valid; - wire [`MEM_TAG_BITS-1:0] mem_bk_req_tag; + wire [`MEM_ID_BITS-1:0] mem_bk_req_tag; wire [`MEM_ADDR_BITS-1:0] mem_bk_req_addr; wire [`MEM_DATA_BITS-1:0] mem_bk_req_data_bits; wire mem_bk_req_ready, mem_bk_req_data_ready, mem_bk_resp_valid; - wire [`MEM_TAG_BITS-1:0] mem_bk_resp_tag; + wire [`MEM_ID_BITS-1:0] mem_bk_resp_tag; wire [`MEM_DATA_BITS-1:0] mem_bk_resp_data; `ifdef MEM_BACKUP_EN @@ -251,15 +374,16 @@ module rocketTestHarness; ); `else // set dessert outputs to zero when !backupmem_en - assign mem_bk_out_ready = 0; - assign mem_bk_in_valid = 0; - assign mem_in_bits = 0; - assign mem_bk_req_valid = 0; - assign mem_bk_req_addr = 0; - assign mem_bk_req_rw = 0; - assign mem_bk_req_tag = 0; - assign mem_bk_req_data_valid = 0; - assign mem_bk_req_data_bits = 0; + assign mem_bk_out_ready = 1'b0; + assign mem_bk_in_valid = 1'b0; + assign mem_in_bits = {`HTIF_WIDTH {1'b0}}; + assign mem_bk_req_valid = 1'b0; + assign mem_bk_req_ready = 1'b0; + assign mem_bk_req_addr = {`MEM_ADDR_BITS {1'b0}}; + assign mem_bk_req_rw = 1'b0; + assign mem_bk_req_tag = {`MEM_ID_BITS {1'b0}}; + assign mem_bk_req_data_valid = 1'b0; + assign mem_bk_req_data_bits = 16'd0; `endif reg htif_in_valid_premux; @@ -392,13 +516,13 @@ module rocketTestHarness; begin $fdisplay(stderr, "*** FAILED *** (%s) after %d simulation cycles", reason, trace_count); `VCDPLUSCLOSE - htif_fini(1); + htif_fini(1'b1); end if (exit == 1) begin `VCDPLUSCLOSE - htif_fini(0); + htif_fini(1'b0); end end @@ -430,9 +554,24 @@ module rocketTestHarness; always @(posedge clk) begin - if (verbose && mem_req_valid && mem_req_ready) + if (verbose) begin - $fdisplay(stderr, "MC: rw=%d addr=%x", mem_req_bits_rw, {mem_req_bits_addr,6'd0}); + if (ar_valid && ar_ready) + begin + $fdisplay(stderr, "MC: ar addr=%x", ar_addr); + end + if (aw_valid && aw_ready) + begin + $fdisplay(stderr, "MC: aw addr=%x", aw_addr); + end + if (w_valid && w_ready) + begin + $fdisplay(stderr, "MC: w data=%x", w_data); + end + if (r_valid && r_ready) + begin + $fdisplay(stderr, "MC: r data=%x", r_data); + end end end