From cfa86dba4f9b71eb4dadbfd674ed94bd476d7f93 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Thu, 2 May 2013 04:58:43 -0700 Subject: [PATCH] add FPGA test bench The memory models now support back pressure on the response. --- csrc/emulator.cc | 15 +++++++++------ csrc/mm.cc | 29 +++++++++++++++++------------ csrc/mm.h | 18 +++++++++++++----- csrc/mm_dramsim2.cc | 26 +++++++++++++++----------- csrc/mm_dramsim2.h | 5 +++-- csrc/vcs_main.cc | 24 +++++++++++++++--------- riscv-rocket | 2 +- riscv-tests | 2 +- src/main/scala/RocketChip.scala | 5 +++-- src/main/scala/fpga.scala | 31 +++++++++---------------------- uncore | 2 +- 11 files changed, 87 insertions(+), 72 deletions(-) diff --git a/csrc/emulator.cc b/csrc/emulator.cc index 35bfaccb..693120ec 100644 --- a/csrc/emulator.cc +++ b/csrc/emulator.cc @@ -60,17 +60,18 @@ int main(int argc, char** argv) fprintf(vcdfile, "$upscope $end\n"); } - mm_t* mm = dramsim2 ? (mm_t*)(new mm_dramsim2_t) : (mm_t*)(new mm_magic_t); - mm->init(MEM_SIZE); - if (loadmem) - load_mem(mm->get_data(), loadmem); - // The chisel generated code Top_t tile; srand(random_seed); tile.init(random_seed != 0); + // Instantiate and initialize main memory + mm_t* mm = dramsim2 ? (mm_t*)(new mm_dramsim2_t) : (mm_t*)(new mm_magic_t); + mm->init(MEM_SIZE, tile.Top__io_mem_resp_bits_data.width()/8, LINE_SIZE); + if (loadmem) + load_mem(mm->get_data(), loadmem); + // Instantiate HTIF htif = new htif_emulator_t(std::vector(argv + 1, argv + argc)); int htif_bits = tile.Top__io_host_in_bits.width(); @@ -105,7 +106,9 @@ int main(int argc, char** argv) tile.Top__io_mem_req_cmd_bits_tag.lo_word(), tile.Top__io_mem_req_data_valid.lo_word(), - &tile.Top__io_mem_req_data_bits_data.values[0] + &tile.Top__io_mem_req_data_bits_data.values[0], + + tile.Top__io_mem_resp_ready.to_bool() ); if (tile.Top__io_host_clk_edge.to_bool()) diff --git a/csrc/mm.cc b/csrc/mm.cc index afbbfef7..c5399cfd 100644 --- a/csrc/mm.cc +++ b/csrc/mm.cc @@ -5,8 +5,11 @@ #include #include -void mm_t::init(size_t sz) +void mm_t::init(size_t sz, int wsz, int lsz) { + assert(wsz > 0 && lsz > 0 && (lsz & (lsz-1)) == 0 && lsz % wsz == 0); + word_size = wsz; + line_size = lsz; data = new char[sz]; size = sz; } @@ -16,10 +19,10 @@ mm_t::~mm_t() delete [] data; } -void mm_magic_t::init(size_t sz) +void mm_magic_t::init(size_t sz, int wsz, int lsz) { - mm_t::init(sz); - dummy_data.resize(MM_WORD_SIZE); + mm_t::init(sz, wsz, lsz); + dummy_data.resize(word_size); } void mm_magic_t::tick @@ -29,28 +32,30 @@ void mm_magic_t::tick uint64_t req_cmd_addr, uint64_t req_cmd_tag, bool req_data_val, - void* req_data_bits + void* req_data_bits, + bool resp_rdy ) { bool req_cmd_fire = req_cmd_val && req_cmd_ready(); bool req_data_fire = req_data_val && req_data_ready(); + bool resp_fire = resp_valid() && resp_rdy; assert(!(req_cmd_fire && req_data_fire)); - if (resp_valid()) + if (resp_fire) resp.pop(); if (req_data_fire) { - memcpy(data + store_addr + store_count*MM_WORD_SIZE, req_data_bits, MM_WORD_SIZE); + memcpy(data + store_addr + store_count*word_size, req_data_bits, word_size); - store_count = (store_count + 1) % REFILL_COUNT; + store_count = (store_count + 1) % (line_size/word_size); if (store_count == 0) store_inflight = false; } if (req_cmd_fire) { - auto byte_addr = req_cmd_addr*REFILL_COUNT*MM_WORD_SIZE; + auto byte_addr = req_cmd_addr * line_size; assert(byte_addr < size); if (req_cmd_store) @@ -58,10 +63,10 @@ void mm_magic_t::tick store_inflight = true; store_addr = byte_addr; } - else for (int i = 0; i < REFILL_COUNT; i++) + else for (int i = 0; i < line_size/word_size; i++) { - auto base = data + byte_addr + i*MM_WORD_SIZE; - auto dat = std::vector(base, base + MM_WORD_SIZE); + auto base = data + byte_addr + i*word_size; + auto dat = std::vector(base, base + word_size); resp.push(std::make_pair(req_cmd_tag, dat)); } } diff --git a/csrc/mm.h b/csrc/mm.h index 64465405..13ccf202 100644 --- a/csrc/mm.h +++ b/csrc/mm.h @@ -1,17 +1,19 @@ #ifndef MM_EMULATOR_H #define MM_EMULATOR_H -#include "mm_param.h" #include #include #include +const int LINE_SIZE = 64; // all cores assume this. +const size_t MEM_SIZE = (sizeof(long) > 4 ? 4L : 1L) * 1024*1024*1024; + class mm_t { public: mm_t() : data(0), size(0) {} - virtual void init(size_t sz); + virtual void init(size_t sz, int word_size, int line_size); virtual bool req_cmd_ready() = 0; virtual bool req_data_ready() = 0; @@ -26,17 +28,22 @@ class mm_t uint64_t req_cmd_addr, uint64_t req_cmd_tag, bool req_data_val, - void* req_data_bits + void* req_data_bits, + bool resp_rdy ) = 0; virtual void* get_data() { return data; } virtual size_t get_size() { return size; } + virtual size_t get_word_size() { return word_size; } + virtual size_t get_line_size() { return line_size; } virtual ~mm_t(); protected: char* data; size_t size; + int word_size; + int line_size; }; class mm_magic_t : public mm_t @@ -44,7 +51,7 @@ class mm_magic_t : public mm_t public: mm_magic_t() : store_inflight(false), store_count(0) {} - virtual void init(size_t sz); + virtual void init(size_t sz, int word_size, int line_size); virtual bool req_cmd_ready() { return !store_inflight; } virtual bool req_data_ready() { return store_inflight; } @@ -59,7 +66,8 @@ class mm_magic_t : public mm_t uint64_t req_cmd_addr, uint64_t req_cmd_tag, bool req_data_val, - void* req_data_bits + void* req_data_bits, + bool resp_rdy ); protected: diff --git a/csrc/mm_dramsim2.cc b/csrc/mm_dramsim2.cc index 6be9eaf6..904a24b3 100644 --- a/csrc/mm_dramsim2.cc +++ b/csrc/mm_dramsim2.cc @@ -19,10 +19,10 @@ void mm_dramsim2_t::read_complete(unsigned id, uint64_t address, uint64_t clock_ auto tag = req[address]; req.erase(address); - for (int i = 0; i < REFILL_COUNT; i++) + for (int i = 0; i < line_size/word_size; i++) { - auto base = data + address + i*MM_WORD_SIZE; - auto dat = std::vector(base, base + MM_WORD_SIZE); + auto base = data + address + i*word_size; + auto dat = std::vector(base, base + word_size); resp.push(std::make_pair(tag, dat)); } @@ -43,10 +43,12 @@ void power_callback(double a, double b, double c, double d) //fprintf(stderr, "power callback: %0.3f, %0.3f, %0.3f, %0.3f\n",a,b,c,d); } -void mm_dramsim2_t::init(size_t sz) +void mm_dramsim2_t::init(size_t sz, int wsz, int lsz) { - mm_t::init(sz); - dummy_data.resize(MM_WORD_SIZE); + assert(lsz == 64); // assumed by dramsim2 + mm_t::init(sz, wsz, lsz); + + dummy_data.resize(word_size); assert(size % (1024*1024) == 0); mem = getMemorySystemInstance("DDR3_micron_64M_8B_x4_sg15.ini", "system.ini", "dramsim2_ini", "results", size/(1024*1024)); @@ -67,20 +69,22 @@ void mm_dramsim2_t::tick uint64_t req_cmd_addr, uint64_t req_cmd_tag, bool req_data_val, - void* req_data_bits + void* req_data_bits, + bool resp_rdy ) { bool req_cmd_fire = req_cmd_val && req_cmd_ready(); bool req_data_fire = req_data_val && req_data_ready(); + bool resp_fire = resp_valid() && resp_rdy; assert(!(req_cmd_fire && req_data_fire)); - if (resp_valid()) + if (resp_fire) resp.pop(); if (req_cmd_fire) { // since the I$ can speculatively ask for address that are out of bounds - auto byte_addr = (req_cmd_addr*REFILL_COUNT*MM_WORD_SIZE) % size; + auto byte_addr = (req_cmd_addr * line_size) % size; if (req_cmd_store) { @@ -104,9 +108,9 @@ void mm_dramsim2_t::tick if (req_data_fire) { - memcpy(data + store_addr + store_count*MM_WORD_SIZE, req_data_bits, MM_WORD_SIZE); + memcpy(data + store_addr + store_count*word_size, req_data_bits, word_size); - store_count = (store_count + 1) % REFILL_COUNT; + store_count = (store_count + 1) % (line_size/word_size); if (store_count == 0) { // last chunch of cache line arrived. store_inflight = 0; diff --git a/csrc/mm_dramsim2.h b/csrc/mm_dramsim2.h index 7db22480..1546afe8 100644 --- a/csrc/mm_dramsim2.h +++ b/csrc/mm_dramsim2.h @@ -12,7 +12,7 @@ class mm_dramsim2_t : public mm_t public: mm_dramsim2_t() : store_inflight(false), store_count(0) {} - virtual void init(size_t sz); + virtual void init(size_t sz, int word_size, int line_size); virtual bool req_cmd_ready() { return mem->willAcceptTransaction() && !store_inflight; } virtual bool req_data_ready() { return mem->willAcceptTransaction() && store_inflight; } @@ -27,7 +27,8 @@ class mm_dramsim2_t : public mm_t uint64_t req_cmd_addr, uint64_t req_cmd_tag, bool req_data_val, - void* req_data_bits + void* req_data_bits, + bool resp_rdy ); diff --git a/csrc/vcs_main.cc b/csrc/vcs_main.cc index 862621cb..e5f5162b 100644 --- a/csrc/vcs_main.cc +++ b/csrc/vcs_main.cc @@ -26,23 +26,24 @@ void memory_tick( vc_handle mem_req_data_bits, vc_handle mem_resp_val, + vc_handle mem_resp_rdy, vc_handle mem_resp_tag, vc_handle mem_resp_data) { - uint32_t req_data[MM_WORD_SIZE*REFILL_COUNT/sizeof(uint32_t)]; - for (size_t i = 0; i < MM_WORD_SIZE*REFILL_COUNT/sizeof(uint32_t); i++) + uint32_t req_data[mm->get_word_size()/sizeof(uint32_t)]; + for (size_t i = 0; i < mm->get_word_size()/sizeof(uint32_t); i++) req_data[i] = vc_4stVectorRef(mem_req_data_bits)[i].d; vc_putScalar(mem_req_rdy, mm->req_cmd_ready()); vc_putScalar(mem_req_data_rdy, mm->req_data_ready()); vc_putScalar(mem_resp_val, mm->resp_valid()); - vec32 d[MM_WORD_SIZE*REFILL_COUNT/sizeof(uint32_t)]; + vec32 d[mm->get_word_size()/sizeof(uint32_t)]; d[0].c = 0; d[0].d = mm->resp_tag(); vc_put4stVector(mem_resp_tag, d); - for (size_t i = 0; i < MM_WORD_SIZE*REFILL_COUNT/sizeof(uint32_t); i++) + for (size_t i = 0; i < mm->get_word_size()/sizeof(uint32_t); i++) { d[i].c = 0; d[i].d = ((uint32_t*)mm->resp_data())[i]; @@ -56,22 +57,26 @@ void memory_tick( vc_4stVectorRef(mem_req_addr)->d, vc_4stVectorRef(mem_req_tag)->d, vc_getScalar(mem_req_data_val), - req_data + req_data, + vc_getScalar(mem_resp_rdy) ); } void htif_init ( - vc_handle width, + vc_handle htif_width, + vc_handle mem_width, vc_handle argv, vc_handle loadmem, vc_handle dramsim ) { + int mw = vc_4stVectorRef(mem_width)->d; mm = vc_getScalar(dramsim) ? (mm_t*)(new mm_dramsim2_t) : (mm_t*)(new mm_magic_t); - mm->init(MEM_SIZE); + assert(mw && (mw & (mw-1)) == 0); + mm->init(MEM_SIZE, mw/8, LINE_SIZE); - vec32* w = vc_4stVectorRef(width); + vec32* w = vc_4stVectorRef(htif_width); assert(w->d <= 32 && w->d % 8 == 0); // htif_tick assumes data fits in a vec32 htif_bytes = w->d/8; @@ -134,7 +139,8 @@ void htif_tick vc_put4stVector(htif_in_bits, &bits); vc_putScalar(htif_in_valid, peek_in_valid); - vc_putScalar(exit, htif->done() ? (htif->exit_code() << 1 | 1) : 0); + bits.d = htif->done() ? (htif->exit_code() << 1 | 1) : 0; + vc_put4stVector(exit, &bits); } } diff --git a/riscv-rocket b/riscv-rocket index ac48cb2a..a5063baf 160000 --- a/riscv-rocket +++ b/riscv-rocket @@ -1 +1 @@ -Subproject commit ac48cb2a5d5388b83aacbe1bf6c1b00610069346 +Subproject commit a5063baf1a5806f577e38c8c33d71225619da0c3 diff --git a/riscv-tests b/riscv-tests index 1f25cfbd..1dd1e131 160000 --- a/riscv-tests +++ b/riscv-tests @@ -1 +1 @@ -Subproject commit 1f25cfbde65518f6e7b43d49451eb3ae1f9d2811 +Subproject commit 1dd1e13180dd65ffe3075cbdc5c12fda8c3e755f diff --git a/src/main/scala/RocketChip.scala b/src/main/scala/RocketChip.scala index d5d3f802..526907e2 100644 --- a/src/main/scala/RocketChip.scala +++ b/src/main/scala/RocketChip.scala @@ -323,7 +323,7 @@ class OuterMemorySystem(htif_width: Int, clientEndpoints: Seq[ClientCoherenceAge val incoherent = Vec(conf.ln.nClients) { Bool() }.asInput val mem_backup = new ioMemSerialized(htif_width) val mem_backup_en = Bool(INPUT) - val mem = new ioMemPipe + val mem = new ioMem } import rocket.Constants._ @@ -375,6 +375,7 @@ class OuterMemorySystem(htif_width: Int, clientEndpoints: Seq[ClientCoherenceAge mem_serdes.io.wide.req_data.bits := mem_dataq.io.deq.bits llc.io.mem.resp.valid := Mux(io.mem_backup_en, mem_serdes.io.wide.resp.valid, io.mem.resp.valid) + io.mem.resp.ready := Bool(true) llc.io.mem.resp.bits := Mux(io.mem_backup_en, mem_serdes.io.wide.resp.bits, io.mem.resp.bits) io.mem_backup <> mem_serdes.io.narrow @@ -388,7 +389,7 @@ class Uncore(htif_width: Int, tileList: Seq[ClientCoherenceAgent])(implicit conf val host = new HostIO(htif_width) val mem_backup = new ioMemSerialized(htif_width) val mem_backup_en = Bool(INPUT) - val mem = new ioMemPipe + val mem = new ioMem val tiles = Vec(conf.ln.nClients) { new TileLinkIO }.flip val htif = Vec(conf.ln.nClients) { new HTIFIO(conf.ln.nClients) }.flip val incoherent = Vec(conf.ln.nClients) { Bool() }.asInput diff --git a/src/main/scala/fpga.scala b/src/main/scala/fpga.scala index 3788864f..f906e3e2 100644 --- a/src/main/scala/fpga.scala +++ b/src/main/scala/fpga.scala @@ -13,7 +13,7 @@ class FPGAOuterMemorySystem(htif_width: Int, clientEndpoints: Seq[ClientCoherenc val tiles = Vec(conf.ln.nClients) { new TileLinkIO }.flip val htif = (new TileLinkIO).flip val incoherent = Vec(conf.ln.nClients) { Bool() }.asInput - val mem = new ioMemPipe + val mem = new ioMem } import rocket.Constants._ @@ -25,9 +25,6 @@ class FPGAOuterMemorySystem(htif_width: Int, clientEndpoints: Seq[ClientCoherenc require(clientEndpoints.length == lnWithHtifConf.nClients) val masterEndpoints = (0 until lnWithHtifConf.nMasters).map(new L2CoherenceAgent(_)(ucWithHtifConf)) - val llc = new DRAMSideLLCNull(NGLOBAL_XACTS, REFILL_CYCLES) - val mem_serdes = new MemSerdes(htif_width) - val net = new ReferenceChipCrossbarNetwork(masterEndpoints++clientEndpoints)(lnWithHtifConf) net.io zip (masterEndpoints.map(_.io.client) ++ io.tiles :+ io.htif) map { case (net, end) => net <> end } masterEndpoints.map{ _.io.incoherent zip (io.incoherent ++ List(Bool(true))) map { case (m, c) => m := c } } @@ -40,19 +37,9 @@ class FPGAOuterMemorySystem(htif_width: Int, clientEndpoints: Seq[ClientCoherenc } else { conv.io.uncached <> masterEndpoints.head.io.master } - llc.io.cpu.req_cmd <> Queue(conv.io.mem.req_cmd) - llc.io.cpu.req_data <> Queue(conv.io.mem.req_data, REFILL_CYCLES) - conv.io.mem.resp <> llc.io.cpu.resp - - val mem_cmdq = (new Queue(2)) { new MemReqCmd } - mem_cmdq.io.enq <> llc.io.mem.req_cmd - mem_cmdq.io.deq <> io.mem.req_cmd - - val mem_dataq = (new Queue(REFILL_CYCLES)) { new MemData } - mem_dataq.io.enq <> llc.io.mem.req_data - mem_dataq.io.deq <> io.mem.req_data - - llc.io.mem.resp <> io.mem.resp + io.mem.req_cmd <> Queue(conv.io.mem.req_cmd) + io.mem.req_data <> Queue(conv.io.mem.req_data, REFILL_CYCLES) + conv.io.mem.resp <> Queue(io.mem.resp, 16) } class FPGAUncore(htif_width: Int, tileList: Seq[ClientCoherenceAgent])(implicit conf: UncoreConfiguration) extends Component @@ -61,7 +48,7 @@ class FPGAUncore(htif_width: Int, tileList: Seq[ClientCoherenceAgent])(implicit val io = new Bundle { val debug = new DebugIO() val host = new HostIO(htif_width) - val mem = new ioMemPipe + val mem = new ioMem val tiles = Vec(conf.ln.nClients) { new TileLinkIO }.flip val htif = Vec(conf.ln.nClients) { new HTIFIO(conf.ln.nClients) }.flip val incoherent = Vec(conf.ln.nClients) { Bool() }.asInput @@ -188,11 +175,11 @@ class Slave extends AXISlave // read cr1 -> mem.req_cmd (nonblocking) // the memory system is FIFO from hereon out, so just remember the tags here - val tagq = new Queue(NGLOBAL_XACTS)(top.io.mem.req_cmd.bits.tag.clone) + val tagq = new Queue(4)(top.io.mem.req_cmd.bits.tag.clone) tagq.io.enq.bits := top.io.mem.req_cmd.bits.tag tagq.io.enq.valid := ren(1) && top.io.mem.req_cmd.valid && !top.io.mem.req_cmd.bits.rw top.io.mem.req_cmd.ready := ren(1) - rdata(1) := Cat(top.io.mem.req_cmd.bits.addr, top.io.mem.req_cmd.bits.rw, top.io.mem.req_cmd.valid) + rdata(1) := Cat(top.io.mem.req_cmd.bits.addr, top.io.mem.req_cmd.bits.rw, top.io.mem.req_cmd.valid && (tagq.io.enq.ready || top.io.mem.req_cmd.bits.rw)) rvalid(1) := Bool(true) require(dw >= top.io.mem.req_cmd.bits.addr.getWidth + 1 + 1) @@ -205,7 +192,7 @@ class Slave extends AXISlave top.io.mem.resp.bits.tag := tagq.io.deq.bits top.io.mem.resp.valid := wen(1) && in_count.andR tagq.io.deq.ready := top.io.mem.resp.fire() && rf_count.andR - wready(1) := Bool(true) //top.io.mem.resp.ready + wready(1) := top.io.mem.resp.ready when (wen(1) && wready(1)) { in_count := in_count + UFix(1) in_reg := top.io.mem.resp.bits.data @@ -222,7 +209,7 @@ class Slave extends AXISlave when (ren(2) && rvalid(2)) { out_count := out_count + UFix(1) } // read cr3 -> error mode (nonblocking) - rdata(3) := top.io.debug.error_mode + rdata(3) := Cat(top.io.mem.req_cmd.valid, tagq.io.enq.ready, top.io.debug.error_mode) rvalid(3) := Bool(true) // writes to cr2, cr3 ignored diff --git a/uncore b/uncore index e39b29ba..d154f3fd 160000 --- a/uncore +++ b/uncore @@ -1 +1 @@ -Subproject commit e39b29bac3889f43fa666bdd72d86b17d439b9ca +Subproject commit d154f3fdb673d28e26363e7d22df4ac1770f2c2c