1
0

add FPGA test bench

The memory models now support back pressure on the response.
This commit is contained in:
Andrew Waterman 2013-05-02 04:58:43 -07:00
parent d2e1828714
commit cfa86dba4f
11 changed files with 87 additions and 72 deletions

View File

@ -60,17 +60,18 @@ int main(int argc, char** argv)
fprintf(vcdfile, "$upscope $end\n");
}
mm_t* mm = dramsim2 ? (mm_t*)(new mm_dramsim2_t) : (mm_t*)(new mm_magic_t);
mm->init(MEM_SIZE);
if (loadmem)
load_mem(mm->get_data(), loadmem);
// The chisel generated code
Top_t tile;
srand(random_seed);
tile.init(random_seed != 0);
// Instantiate and initialize main memory
mm_t* mm = dramsim2 ? (mm_t*)(new mm_dramsim2_t) : (mm_t*)(new mm_magic_t);
mm->init(MEM_SIZE, tile.Top__io_mem_resp_bits_data.width()/8, LINE_SIZE);
if (loadmem)
load_mem(mm->get_data(), loadmem);
// Instantiate HTIF
htif = new htif_emulator_t(std::vector<std::string>(argv + 1, argv + argc));
int htif_bits = tile.Top__io_host_in_bits.width();
@ -105,7 +106,9 @@ int main(int argc, char** argv)
tile.Top__io_mem_req_cmd_bits_tag.lo_word(),
tile.Top__io_mem_req_data_valid.lo_word(),
&tile.Top__io_mem_req_data_bits_data.values[0]
&tile.Top__io_mem_req_data_bits_data.values[0],
tile.Top__io_mem_resp_ready.to_bool()
);
if (tile.Top__io_host_clk_edge.to_bool())

View File

@ -5,8 +5,11 @@
#include <cstring>
#include <cassert>
void mm_t::init(size_t sz)
void mm_t::init(size_t sz, int wsz, int lsz)
{
assert(wsz > 0 && lsz > 0 && (lsz & (lsz-1)) == 0 && lsz % wsz == 0);
word_size = wsz;
line_size = lsz;
data = new char[sz];
size = sz;
}
@ -16,10 +19,10 @@ mm_t::~mm_t()
delete [] data;
}
void mm_magic_t::init(size_t sz)
void mm_magic_t::init(size_t sz, int wsz, int lsz)
{
mm_t::init(sz);
dummy_data.resize(MM_WORD_SIZE);
mm_t::init(sz, wsz, lsz);
dummy_data.resize(word_size);
}
void mm_magic_t::tick
@ -29,28 +32,30 @@ void mm_magic_t::tick
uint64_t req_cmd_addr,
uint64_t req_cmd_tag,
bool req_data_val,
void* req_data_bits
void* req_data_bits,
bool resp_rdy
)
{
bool req_cmd_fire = req_cmd_val && req_cmd_ready();
bool req_data_fire = req_data_val && req_data_ready();
bool resp_fire = resp_valid() && resp_rdy;
assert(!(req_cmd_fire && req_data_fire));
if (resp_valid())
if (resp_fire)
resp.pop();
if (req_data_fire)
{
memcpy(data + store_addr + store_count*MM_WORD_SIZE, req_data_bits, MM_WORD_SIZE);
memcpy(data + store_addr + store_count*word_size, req_data_bits, word_size);
store_count = (store_count + 1) % REFILL_COUNT;
store_count = (store_count + 1) % (line_size/word_size);
if (store_count == 0)
store_inflight = false;
}
if (req_cmd_fire)
{
auto byte_addr = req_cmd_addr*REFILL_COUNT*MM_WORD_SIZE;
auto byte_addr = req_cmd_addr * line_size;
assert(byte_addr < size);
if (req_cmd_store)
@ -58,10 +63,10 @@ void mm_magic_t::tick
store_inflight = true;
store_addr = byte_addr;
}
else for (int i = 0; i < REFILL_COUNT; i++)
else for (int i = 0; i < line_size/word_size; i++)
{
auto base = data + byte_addr + i*MM_WORD_SIZE;
auto dat = std::vector<char>(base, base + MM_WORD_SIZE);
auto base = data + byte_addr + i*word_size;
auto dat = std::vector<char>(base, base + word_size);
resp.push(std::make_pair(req_cmd_tag, dat));
}
}

View File

@ -1,17 +1,19 @@
#ifndef MM_EMULATOR_H
#define MM_EMULATOR_H
#include "mm_param.h"
#include <stdint.h>
#include <cstring>
#include <queue>
const int LINE_SIZE = 64; // all cores assume this.
const size_t MEM_SIZE = (sizeof(long) > 4 ? 4L : 1L) * 1024*1024*1024;
class mm_t
{
public:
mm_t() : data(0), size(0) {}
virtual void init(size_t sz);
virtual void init(size_t sz, int word_size, int line_size);
virtual bool req_cmd_ready() = 0;
virtual bool req_data_ready() = 0;
@ -26,17 +28,22 @@ class mm_t
uint64_t req_cmd_addr,
uint64_t req_cmd_tag,
bool req_data_val,
void* req_data_bits
void* req_data_bits,
bool resp_rdy
) = 0;
virtual void* get_data() { return data; }
virtual size_t get_size() { return size; }
virtual size_t get_word_size() { return word_size; }
virtual size_t get_line_size() { return line_size; }
virtual ~mm_t();
protected:
char* data;
size_t size;
int word_size;
int line_size;
};
class mm_magic_t : public mm_t
@ -44,7 +51,7 @@ class mm_magic_t : public mm_t
public:
mm_magic_t() : store_inflight(false), store_count(0) {}
virtual void init(size_t sz);
virtual void init(size_t sz, int word_size, int line_size);
virtual bool req_cmd_ready() { return !store_inflight; }
virtual bool req_data_ready() { return store_inflight; }
@ -59,7 +66,8 @@ class mm_magic_t : public mm_t
uint64_t req_cmd_addr,
uint64_t req_cmd_tag,
bool req_data_val,
void* req_data_bits
void* req_data_bits,
bool resp_rdy
);
protected:

View File

@ -19,10 +19,10 @@ void mm_dramsim2_t::read_complete(unsigned id, uint64_t address, uint64_t clock_
auto tag = req[address];
req.erase(address);
for (int i = 0; i < REFILL_COUNT; i++)
for (int i = 0; i < line_size/word_size; i++)
{
auto base = data + address + i*MM_WORD_SIZE;
auto dat = std::vector<char>(base, base + MM_WORD_SIZE);
auto base = data + address + i*word_size;
auto dat = std::vector<char>(base, base + word_size);
resp.push(std::make_pair(tag, dat));
}
@ -43,10 +43,12 @@ void power_callback(double a, double b, double c, double d)
//fprintf(stderr, "power callback: %0.3f, %0.3f, %0.3f, %0.3f\n",a,b,c,d);
}
void mm_dramsim2_t::init(size_t sz)
void mm_dramsim2_t::init(size_t sz, int wsz, int lsz)
{
mm_t::init(sz);
dummy_data.resize(MM_WORD_SIZE);
assert(lsz == 64); // assumed by dramsim2
mm_t::init(sz, wsz, lsz);
dummy_data.resize(word_size);
assert(size % (1024*1024) == 0);
mem = getMemorySystemInstance("DDR3_micron_64M_8B_x4_sg15.ini", "system.ini", "dramsim2_ini", "results", size/(1024*1024));
@ -67,20 +69,22 @@ void mm_dramsim2_t::tick
uint64_t req_cmd_addr,
uint64_t req_cmd_tag,
bool req_data_val,
void* req_data_bits
void* req_data_bits,
bool resp_rdy
)
{
bool req_cmd_fire = req_cmd_val && req_cmd_ready();
bool req_data_fire = req_data_val && req_data_ready();
bool resp_fire = resp_valid() && resp_rdy;
assert(!(req_cmd_fire && req_data_fire));
if (resp_valid())
if (resp_fire)
resp.pop();
if (req_cmd_fire)
{
// since the I$ can speculatively ask for address that are out of bounds
auto byte_addr = (req_cmd_addr*REFILL_COUNT*MM_WORD_SIZE) % size;
auto byte_addr = (req_cmd_addr * line_size) % size;
if (req_cmd_store)
{
@ -104,9 +108,9 @@ void mm_dramsim2_t::tick
if (req_data_fire)
{
memcpy(data + store_addr + store_count*MM_WORD_SIZE, req_data_bits, MM_WORD_SIZE);
memcpy(data + store_addr + store_count*word_size, req_data_bits, word_size);
store_count = (store_count + 1) % REFILL_COUNT;
store_count = (store_count + 1) % (line_size/word_size);
if (store_count == 0)
{ // last chunch of cache line arrived.
store_inflight = 0;

View File

@ -12,7 +12,7 @@ class mm_dramsim2_t : public mm_t
public:
mm_dramsim2_t() : store_inflight(false), store_count(0) {}
virtual void init(size_t sz);
virtual void init(size_t sz, int word_size, int line_size);
virtual bool req_cmd_ready() { return mem->willAcceptTransaction() && !store_inflight; }
virtual bool req_data_ready() { return mem->willAcceptTransaction() && store_inflight; }
@ -27,7 +27,8 @@ class mm_dramsim2_t : public mm_t
uint64_t req_cmd_addr,
uint64_t req_cmd_tag,
bool req_data_val,
void* req_data_bits
void* req_data_bits,
bool resp_rdy
);

View File

@ -26,23 +26,24 @@ void memory_tick(
vc_handle mem_req_data_bits,
vc_handle mem_resp_val,
vc_handle mem_resp_rdy,
vc_handle mem_resp_tag,
vc_handle mem_resp_data)
{
uint32_t req_data[MM_WORD_SIZE*REFILL_COUNT/sizeof(uint32_t)];
for (size_t i = 0; i < MM_WORD_SIZE*REFILL_COUNT/sizeof(uint32_t); i++)
uint32_t req_data[mm->get_word_size()/sizeof(uint32_t)];
for (size_t i = 0; i < mm->get_word_size()/sizeof(uint32_t); i++)
req_data[i] = vc_4stVectorRef(mem_req_data_bits)[i].d;
vc_putScalar(mem_req_rdy, mm->req_cmd_ready());
vc_putScalar(mem_req_data_rdy, mm->req_data_ready());
vc_putScalar(mem_resp_val, mm->resp_valid());
vec32 d[MM_WORD_SIZE*REFILL_COUNT/sizeof(uint32_t)];
vec32 d[mm->get_word_size()/sizeof(uint32_t)];
d[0].c = 0;
d[0].d = mm->resp_tag();
vc_put4stVector(mem_resp_tag, d);
for (size_t i = 0; i < MM_WORD_SIZE*REFILL_COUNT/sizeof(uint32_t); i++)
for (size_t i = 0; i < mm->get_word_size()/sizeof(uint32_t); i++)
{
d[i].c = 0;
d[i].d = ((uint32_t*)mm->resp_data())[i];
@ -56,22 +57,26 @@ void memory_tick(
vc_4stVectorRef(mem_req_addr)->d,
vc_4stVectorRef(mem_req_tag)->d,
vc_getScalar(mem_req_data_val),
req_data
req_data,
vc_getScalar(mem_resp_rdy)
);
}
void htif_init
(
vc_handle width,
vc_handle htif_width,
vc_handle mem_width,
vc_handle argv,
vc_handle loadmem,
vc_handle dramsim
)
{
int mw = vc_4stVectorRef(mem_width)->d;
mm = vc_getScalar(dramsim) ? (mm_t*)(new mm_dramsim2_t) : (mm_t*)(new mm_magic_t);
mm->init(MEM_SIZE);
assert(mw && (mw & (mw-1)) == 0);
mm->init(MEM_SIZE, mw/8, LINE_SIZE);
vec32* w = vc_4stVectorRef(width);
vec32* w = vc_4stVectorRef(htif_width);
assert(w->d <= 32 && w->d % 8 == 0); // htif_tick assumes data fits in a vec32
htif_bytes = w->d/8;
@ -134,7 +139,8 @@ void htif_tick
vc_put4stVector(htif_in_bits, &bits);
vc_putScalar(htif_in_valid, peek_in_valid);
vc_putScalar(exit, htif->done() ? (htif->exit_code() << 1 | 1) : 0);
bits.d = htif->done() ? (htif->exit_code() << 1 | 1) : 0;
vc_put4stVector(exit, &bits);
}
}

@ -1 +1 @@
Subproject commit ac48cb2a5d5388b83aacbe1bf6c1b00610069346
Subproject commit a5063baf1a5806f577e38c8c33d71225619da0c3

@ -1 +1 @@
Subproject commit 1f25cfbde65518f6e7b43d49451eb3ae1f9d2811
Subproject commit 1dd1e13180dd65ffe3075cbdc5c12fda8c3e755f

View File

@ -323,7 +323,7 @@ class OuterMemorySystem(htif_width: Int, clientEndpoints: Seq[ClientCoherenceAge
val incoherent = Vec(conf.ln.nClients) { Bool() }.asInput
val mem_backup = new ioMemSerialized(htif_width)
val mem_backup_en = Bool(INPUT)
val mem = new ioMemPipe
val mem = new ioMem
}
import rocket.Constants._
@ -375,6 +375,7 @@ class OuterMemorySystem(htif_width: Int, clientEndpoints: Seq[ClientCoherenceAge
mem_serdes.io.wide.req_data.bits := mem_dataq.io.deq.bits
llc.io.mem.resp.valid := Mux(io.mem_backup_en, mem_serdes.io.wide.resp.valid, io.mem.resp.valid)
io.mem.resp.ready := Bool(true)
llc.io.mem.resp.bits := Mux(io.mem_backup_en, mem_serdes.io.wide.resp.bits, io.mem.resp.bits)
io.mem_backup <> mem_serdes.io.narrow
@ -388,7 +389,7 @@ class Uncore(htif_width: Int, tileList: Seq[ClientCoherenceAgent])(implicit conf
val host = new HostIO(htif_width)
val mem_backup = new ioMemSerialized(htif_width)
val mem_backup_en = Bool(INPUT)
val mem = new ioMemPipe
val mem = new ioMem
val tiles = Vec(conf.ln.nClients) { new TileLinkIO }.flip
val htif = Vec(conf.ln.nClients) { new HTIFIO(conf.ln.nClients) }.flip
val incoherent = Vec(conf.ln.nClients) { Bool() }.asInput

View File

@ -13,7 +13,7 @@ class FPGAOuterMemorySystem(htif_width: Int, clientEndpoints: Seq[ClientCoherenc
val tiles = Vec(conf.ln.nClients) { new TileLinkIO }.flip
val htif = (new TileLinkIO).flip
val incoherent = Vec(conf.ln.nClients) { Bool() }.asInput
val mem = new ioMemPipe
val mem = new ioMem
}
import rocket.Constants._
@ -25,9 +25,6 @@ class FPGAOuterMemorySystem(htif_width: Int, clientEndpoints: Seq[ClientCoherenc
require(clientEndpoints.length == lnWithHtifConf.nClients)
val masterEndpoints = (0 until lnWithHtifConf.nMasters).map(new L2CoherenceAgent(_)(ucWithHtifConf))
val llc = new DRAMSideLLCNull(NGLOBAL_XACTS, REFILL_CYCLES)
val mem_serdes = new MemSerdes(htif_width)
val net = new ReferenceChipCrossbarNetwork(masterEndpoints++clientEndpoints)(lnWithHtifConf)
net.io zip (masterEndpoints.map(_.io.client) ++ io.tiles :+ io.htif) map { case (net, end) => net <> end }
masterEndpoints.map{ _.io.incoherent zip (io.incoherent ++ List(Bool(true))) map { case (m, c) => m := c } }
@ -40,19 +37,9 @@ class FPGAOuterMemorySystem(htif_width: Int, clientEndpoints: Seq[ClientCoherenc
} else {
conv.io.uncached <> masterEndpoints.head.io.master
}
llc.io.cpu.req_cmd <> Queue(conv.io.mem.req_cmd)
llc.io.cpu.req_data <> Queue(conv.io.mem.req_data, REFILL_CYCLES)
conv.io.mem.resp <> llc.io.cpu.resp
val mem_cmdq = (new Queue(2)) { new MemReqCmd }
mem_cmdq.io.enq <> llc.io.mem.req_cmd
mem_cmdq.io.deq <> io.mem.req_cmd
val mem_dataq = (new Queue(REFILL_CYCLES)) { new MemData }
mem_dataq.io.enq <> llc.io.mem.req_data
mem_dataq.io.deq <> io.mem.req_data
llc.io.mem.resp <> io.mem.resp
io.mem.req_cmd <> Queue(conv.io.mem.req_cmd)
io.mem.req_data <> Queue(conv.io.mem.req_data, REFILL_CYCLES)
conv.io.mem.resp <> Queue(io.mem.resp, 16)
}
class FPGAUncore(htif_width: Int, tileList: Seq[ClientCoherenceAgent])(implicit conf: UncoreConfiguration) extends Component
@ -61,7 +48,7 @@ class FPGAUncore(htif_width: Int, tileList: Seq[ClientCoherenceAgent])(implicit
val io = new Bundle {
val debug = new DebugIO()
val host = new HostIO(htif_width)
val mem = new ioMemPipe
val mem = new ioMem
val tiles = Vec(conf.ln.nClients) { new TileLinkIO }.flip
val htif = Vec(conf.ln.nClients) { new HTIFIO(conf.ln.nClients) }.flip
val incoherent = Vec(conf.ln.nClients) { Bool() }.asInput
@ -188,11 +175,11 @@ class Slave extends AXISlave
// read cr1 -> mem.req_cmd (nonblocking)
// the memory system is FIFO from hereon out, so just remember the tags here
val tagq = new Queue(NGLOBAL_XACTS)(top.io.mem.req_cmd.bits.tag.clone)
val tagq = new Queue(4)(top.io.mem.req_cmd.bits.tag.clone)
tagq.io.enq.bits := top.io.mem.req_cmd.bits.tag
tagq.io.enq.valid := ren(1) && top.io.mem.req_cmd.valid && !top.io.mem.req_cmd.bits.rw
top.io.mem.req_cmd.ready := ren(1)
rdata(1) := Cat(top.io.mem.req_cmd.bits.addr, top.io.mem.req_cmd.bits.rw, top.io.mem.req_cmd.valid)
rdata(1) := Cat(top.io.mem.req_cmd.bits.addr, top.io.mem.req_cmd.bits.rw, top.io.mem.req_cmd.valid && (tagq.io.enq.ready || top.io.mem.req_cmd.bits.rw))
rvalid(1) := Bool(true)
require(dw >= top.io.mem.req_cmd.bits.addr.getWidth + 1 + 1)
@ -205,7 +192,7 @@ class Slave extends AXISlave
top.io.mem.resp.bits.tag := tagq.io.deq.bits
top.io.mem.resp.valid := wen(1) && in_count.andR
tagq.io.deq.ready := top.io.mem.resp.fire() && rf_count.andR
wready(1) := Bool(true) //top.io.mem.resp.ready
wready(1) := top.io.mem.resp.ready
when (wen(1) && wready(1)) {
in_count := in_count + UFix(1)
in_reg := top.io.mem.resp.bits.data
@ -222,7 +209,7 @@ class Slave extends AXISlave
when (ren(2) && rvalid(2)) { out_count := out_count + UFix(1) }
// read cr3 -> error mode (nonblocking)
rdata(3) := top.io.debug.error_mode
rdata(3) := Cat(top.io.mem.req_cmd.valid, tagq.io.enq.ready, top.io.debug.error_mode)
rvalid(3) := Bool(true)
// writes to cr2, cr3 ignored

2
uncore

@ -1 +1 @@
Subproject commit e39b29bac3889f43fa666bdd72d86b17d439b9ca
Subproject commit d154f3fdb673d28e26363e7d22df4ac1770f2c2c