1
0

simplify c++ memory models; support +dramsim flag

works for both vlsi and emulator
This commit is contained in:
Andrew Waterman 2012-12-04 07:04:26 -08:00
parent 5dfb388f03
commit d911e635d6
11 changed files with 412 additions and 85 deletions

View File

@ -2,10 +2,16 @@
MODEL := Top
CXX := g++
CXXFLAGS := -O2
CXXFLAGS := -O2 -g
SBT := java -Xmx2048M -Xss8M -XX:MaxPermSize=128M -jar sbt-launch.jar
DRAMSIM_OBJS := $(patsubst %.cpp,%.o,$(wildcard $(basedir)/dramsim2/*.cpp))
$(DRAMSIM_OBJS): %.o: %.cpp
$(CXX) $(CXXFLAGS) -DNO_STORAGE -DNO_OUTPUT -Dmain=nomain -c -o $@ $<
libdramsim.a: $(DRAMSIM_OBJS)
ar rcs $@ $^
#--------------------------------------------------------------------
# Tests
#--------------------------------------------------------------------

2
chisel

@ -1 +1 @@
Subproject commit 14a2dfa4be88f60965476207872a26bd1b68448c
Subproject commit 2387c2d41ba2239c8939c1a0819201db300297d5

View File

@ -6,8 +6,8 @@
#include <map>
#include "common.h"
#include "emulator.h"
//#include "mm_emulator.cc"
#include "mm_emulator_dramsim2.cc"
#include "mm.h"
#include "mm_dramsim2.h"
#include "Top.h" // chisel-generated code...
#include "disasm.h"
@ -31,6 +31,7 @@ int main(int argc, char** argv)
FILE *vcdfile = NULL, *logfile = stderr;
const char* failure = NULL;
disassembler disasm;
bool dramsim2 = false;
signal(SIGTERM, handle_sigterm);
@ -41,6 +42,8 @@ int main(int argc, char** argv)
log = true;
else if (arg == "-q")
quiet = true;
else if (arg == "+dramsim")
dramsim2 = true;
else if (arg.substr(0, 2) == "-v")
vcd = argv[i]+2;
else if (arg.substr(0, 2) == "-m")
@ -75,11 +78,10 @@ int main(int argc, char** argv)
fprintf(vcdfile, "$upscope $end\n");
}
// basic fixed latency memory model
/*uint64_t* mem = mm_init();*/
uint64_t* mm_mem = dramsim2_init();
if (loadmem != NULL)
load_mem(mm_mem, loadmem);
mm_t* mm = dramsim2 ? (mm_t*)(new mm_dramsim2_t) : (mm_t*)(new mm_magic_t);
mm->init(MEM_SIZE);
if (loadmem)
load_mem(mm->get_data(), loadmem);
// The chisel generated code
@ -101,25 +103,11 @@ int main(int argc, char** argv)
while (!exit_now)
{
// fprintf(stderr, "trace count: %ld\n", trace_count);
// memory model
// mm_tick_emulator(
dramsim2_tick_emulator (
tile.Top__io_mem_req_cmd_valid.lo_word(),
&tile.Top__io_mem_req_cmd_ready.values[0],
tile.Top__io_mem_req_cmd_bits_rw.lo_word(),
tile.Top__io_mem_req_cmd_bits_addr.lo_word(),
tile.Top__io_mem_req_cmd_bits_tag.lo_word(),
tile.Top__io_mem_req_data_valid.lo_word(),
&tile.Top__io_mem_req_data_ready.values[0],
&tile.Top__io_mem_req_data_bits_data.values[0],
&tile.Top__io_mem_resp_valid.values[0],
&tile.Top__io_mem_resp_bits_tag.values[0],
&tile.Top__io_mem_resp_bits_data.values[0]
);
// fprintf(stderr, "trace count: %ld (after dramsim2_tick_emulator)\n", trace_count);
tile.Top__io_mem_req_cmd_ready = LIT<1>(mm->req_cmd_ready());
tile.Top__io_mem_req_data_ready = LIT<1>(mm->req_data_ready());
tile.Top__io_mem_resp_valid = LIT<1>(mm->resp_valid());
tile.Top__io_mem_resp_bits_tag = LIT<64>(mm->resp_tag());
memcpy(&tile.Top__io_mem_resp_bits_data, mm->resp_data(), tile.Top__io_mem_resp_bits_data.width()/8);
tile.Top__io_host_in_valid = LIT<1>(htif_phy.in_valid());
tile.Top__io_host_in_bits = LIT<64>(htif_phy.in_bits());
@ -127,6 +115,16 @@ int main(int argc, char** argv)
tile.clock_lo(LIT<1>(0));
mm->tick(
tile.Top__io_mem_req_cmd_valid.lo_word(),
tile.Top__io_mem_req_cmd_bits_rw.lo_word(),
tile.Top__io_mem_req_cmd_bits_addr.lo_word(),
tile.Top__io_mem_req_cmd_bits_tag.lo_word(),
tile.Top__io_mem_req_data_valid.lo_word(),
&tile.Top__io_mem_req_data_bits_data.values[0]
);
if (tile.Top__io_host_clk_edge.to_bool())
{
htif_phy.tick(tile.Top__io_host_in_ready.lo_word(),

90
csrc/mm.cc Normal file
View File

@ -0,0 +1,90 @@
#include "mm.h"
#include <iostream>
#include <fstream>
#include <cstdlib>
#include <cstring>
#include <cassert>
void mm_t::init(size_t sz)
{
data = new char[sz];
size = sz;
}
mm_t::~mm_t()
{
delete [] data;
}
void mm_magic_t::init(size_t sz)
{
mm_t::init(sz);
dummy_data.resize(MM_WORD_SIZE);
}
void mm_magic_t::tick
(
bool req_cmd_val,
bool req_cmd_store,
uint64_t req_cmd_addr,
uint64_t req_cmd_tag,
bool req_data_val,
void* req_data_bits
)
{
bool req_cmd_fire = req_cmd_val && req_cmd_ready();
bool req_data_fire = req_data_val && req_data_ready();
assert(!(req_cmd_fire && req_data_fire));
if (resp_valid())
resp.pop();
if (req_data_fire)
{
memcpy(data + store_addr + store_count*MM_WORD_SIZE, req_data_bits, MM_WORD_SIZE);
store_count = (store_count + 1) % REFILL_COUNT;
if (store_count == 0)
store_inflight = false;
}
if (req_cmd_fire)
{
auto byte_addr = req_cmd_addr*REFILL_COUNT*MM_WORD_SIZE;
assert(byte_addr < size);
if (req_cmd_store)
{
store_inflight = true;
store_addr = byte_addr;
}
else for (int i = 0; i < REFILL_COUNT; i++)
{
auto base = data + byte_addr + i*MM_WORD_SIZE;
auto dat = std::vector<char>(base, base + MM_WORD_SIZE);
resp.push(std::make_pair(req_cmd_tag, dat));
}
}
cycle++;
}
void load_mem(void* mem, const char* fn)
{
char* m = (char*)mem;
std::ifstream in(fn);
if (!in)
{
std::cerr << "could not open " << fn << std::endl;
exit(-1);
}
std::string line;
while (std::getline(in, line))
{
#define parse_nibble(c) ((c) >= 'a' ? (c)-'a'+10 : (c)-'0')
for (ssize_t i = line.length()-2, j = 0; i >= 0; i -= 2, j++)
m[j] = (parse_nibble(line[i]) << 4) | parse_nibble(line[i+1]);
m += line.length()/2;
}
}

75
csrc/mm.h Normal file
View File

@ -0,0 +1,75 @@
#ifndef MM_EMULATOR_H
#define MM_EMULATOR_H
#include "mm_param.h"
#include <stdint.h>
#include <queue>
class mm_t
{
public:
mm_t() : data(0), size(0) {}
virtual void init(size_t sz);
virtual bool req_cmd_ready() = 0;
virtual bool req_data_ready() = 0;
virtual bool resp_valid() = 0;
virtual uint64_t resp_tag() = 0;
virtual void* resp_data() = 0;
virtual void tick
(
bool req_cmd_val,
bool req_cmd_store,
uint64_t req_cmd_addr,
uint64_t req_cmd_tag,
bool req_data_val,
void* req_data_bits
) = 0;
virtual void* get_data() { return data; }
virtual size_t get_size() { return size; }
virtual ~mm_t();
protected:
char* data;
size_t size;
};
class mm_magic_t : public mm_t
{
public:
mm_magic_t() : store_inflight(false), store_count(0) {}
virtual void init(size_t sz);
virtual bool req_cmd_ready() { return !store_inflight; }
virtual bool req_data_ready() { return store_inflight; }
virtual bool resp_valid() { return !resp.empty(); }
virtual uint64_t resp_tag() { return resp_valid() ? resp.front().first : 0; }
virtual void* resp_data() { return resp_valid() ? &resp.front().second[0] : &dummy_data[0]; }
virtual void tick
(
bool req_cmd_val,
bool req_cmd_store,
uint64_t req_cmd_addr,
uint64_t req_cmd_tag,
bool req_data_val,
void* req_data_bits
);
protected:
bool store_inflight;
int store_count;
uint64_t store_addr;
std::vector<char> dummy_data;
uint64_t cycle;
std::queue<std::pair<uint64_t, std::vector<char>>> resp;
};
void load_mem(void* mem, const char* fn);
#endif

122
csrc/mm_dramsim2.cc Normal file
View File

@ -0,0 +1,122 @@
#include "mm_dramsim2.h"
#include "mm.h"
#include <DRAMSim.h>
#include <iostream>
#include <fstream>
#include <list>
#include <queue>
#include <cstring>
#include <cstdlib>
#include <cassert>
//#define DEBUG_DRAMSIM2
using namespace DRAMSim;
void mm_dramsim2_t::read_complete(unsigned id, uint64_t address, uint64_t clock_cycle)
{
assert(req.count(address));
auto tag = req[address];
req.erase(address);
for (int i = 0; i < REFILL_COUNT; i++)
{
auto base = data + address + i*MM_WORD_SIZE;
auto dat = std::vector<char>(base, base + MM_WORD_SIZE);
resp.push(std::make_pair(tag, dat));
}
#ifdef DEBUG_DRAMSIM2
fprintf(stderr, "[Callback] read complete: id=%d , addr=0x%lx , cycle=%lu\n", id, address, clock_cycle);
#endif
}
void mm_dramsim2_t::write_complete(unsigned id, uint64_t address, uint64_t clock_cycle)
{
#ifdef DEBUG_DRAMSIM2
fprintf(stderr, "[Callback] write complete: id=%d , addr=0x%lx , cycle=%lu\n", id, address, clock_cycle);
#endif
}
void power_callback(double a, double b, double c, double d)
{
//fprintf(stderr, "power callback: %0.3f, %0.3f, %0.3f, %0.3f\n",a,b,c,d);
}
void mm_dramsim2_t::init(size_t sz)
{
mm_t::init(sz);
dummy_data.resize(MM_WORD_SIZE);
assert(size % (1024*1024) == 0);
mem = getMemorySystemInstance("DDR3_micron_64M_8B_x4_sg15.ini", "system.ini", "dramsim2_ini", "results", size/(1024*1024));
TransactionCompleteCB *read_cb = new Callback<mm_dramsim2_t, void, unsigned, uint64_t, uint64_t>(this, &mm_dramsim2_t::read_complete);
TransactionCompleteCB *write_cb = new Callback<mm_dramsim2_t, void, unsigned, uint64_t, uint64_t>(this, &mm_dramsim2_t::write_complete);
mem->RegisterCallbacks(read_cb, write_cb, power_callback);
#ifdef DEBUG_DRAMSIM2
fprintf(stderr,"Dramsim2 init successful\n");
#endif
}
void mm_dramsim2_t::tick
(
bool req_cmd_val,
bool req_cmd_store,
uint64_t req_cmd_addr,
uint64_t req_cmd_tag,
bool req_data_val,
void* req_data_bits
)
{
bool req_cmd_fire = req_cmd_val && req_cmd_ready();
bool req_data_fire = req_data_val && req_data_ready();
assert(!(req_cmd_fire && req_data_fire));
if (resp_valid())
resp.pop();
if (req_cmd_fire)
{
auto byte_addr = req_cmd_addr*REFILL_COUNT*MM_WORD_SIZE;
assert(byte_addr < size);
if (req_cmd_store)
{
store_inflight = 1;
store_addr = byte_addr;
#ifdef DEBUG_DRAMSIM2
fprintf(stderr, "Starting store transaction (addr=%lx ; tag=%ld ; cyc=%ld)\n", store_addr, req_cmd_tag, cycle);
#endif
}
else
{
assert(!req.count(byte_addr));
req[byte_addr] = req_cmd_tag;
mem->addTransaction(false, byte_addr);
#ifdef DEBUG_DRAMSIM2
fprintf(stderr, "Adding load transaction (addr=%lx; cyc=%ld)\n", byte_addr, cycle);
#endif
}
}
if (req_data_fire)
{
memcpy(data + store_addr + store_count*MM_WORD_SIZE, req_data_bits, MM_WORD_SIZE);
store_count = (store_count + 1) % REFILL_COUNT;
if (store_count == 0)
{ // last chunch of cache line arrived.
store_inflight = 0;
mem->addTransaction(true, store_addr);
#ifdef DEBUG_DRAMSIM2
fprintf(stderr, "Adding store transaction (addr=%lx; cyc=%ld)\n", store_addr, cycle);
#endif
}
}
mem->update();
cycle++;
}

50
csrc/mm_dramsim2.h Normal file
View File

@ -0,0 +1,50 @@
#ifndef _MM_EMULATOR_DRAMSIM2_H
#define _MM_EMULATOR_DRAMSIM2_H
#include "mm.h"
#include <DRAMSim.h>
#include <map>
#include <queue>
#include <stdint.h>
class mm_dramsim2_t : public mm_t
{
public:
mm_dramsim2_t() : store_inflight(false), store_count(0) {}
virtual void init(size_t sz);
virtual bool req_cmd_ready() { return mem->willAcceptTransaction() && !store_inflight; }
virtual bool req_data_ready() { return mem->willAcceptTransaction() && store_inflight; }
virtual bool resp_valid() { return !resp.empty(); }
virtual uint64_t resp_tag() { return resp_valid() ? resp.front().first : 0; }
virtual void* resp_data() { return resp_valid() ? &resp.front().second[0] : &dummy_data[0]; }
virtual void tick
(
bool req_cmd_val,
bool req_cmd_store,
uint64_t req_cmd_addr,
uint64_t req_cmd_tag,
bool req_data_val,
void* req_data_bits
);
protected:
DRAMSim::MultiChannelMemorySystem *mem;
uint64_t cycle;
bool store_inflight;
int store_count;
uint64_t store_addr;
std::vector<char> dummy_data;
std::map<uint64_t,uint64_t> req;
std::queue<std::pair<uint64_t, std::vector<char>>> resp;
void read_complete(unsigned id, uint64_t address, uint64_t clock_cycle);
void write_complete(unsigned id, uint64_t address, uint64_t clock_cycle);
};
#endif

View File

@ -1,10 +1,10 @@
#include "mm_model.h"
#include "htif_phy.h"
#include "mm_types.h"
#include "mm_emulator.cc"
#include "mm.h"
#include "mm_dramsim2.h"
#include <DirectC.h>
htif_phy_t* htif_phy = NULL;
mm_t* mm = NULL;
extern "C" {
@ -23,45 +23,35 @@ void memory_tick(
vc_handle mem_resp_tag,
vc_handle mem_resp_data)
{
uint64_t req_data[MM_WORD_SIZE*refill_size/sizeof(uint64_t)];
for (size_t i = 0; i < MM_WORD_SIZE*refill_size/sizeof(uint32_t); i++)
((uint32_t*)req_data)[i] = vc_4stVectorRef(mem_req_data_bits)[i].d;
uint32_t req_data[MM_WORD_SIZE*REFILL_COUNT/sizeof(uint32_t)];
for (size_t i = 0; i < MM_WORD_SIZE*REFILL_COUNT/sizeof(uint32_t); i++)
req_data[i] = vc_4stVectorRef(mem_req_data_bits)[i].d;
uint64_t req_rdy, req_data_rdy, resp_val, resp_tag;
uint64_t resp_data[MM_WORD_SIZE*refill_size/sizeof(uint64_t)];
vc_putScalar(mem_req_rdy, mm->req_cmd_ready());
vc_putScalar(mem_req_data_rdy, mm->req_data_ready());
vc_putScalar(mem_resp_val, mm->resp_valid());
mm_tick_emulator(
vec32 d[MM_WORD_SIZE*REFILL_COUNT/sizeof(uint32_t)];
d[0].c = 0;
d[0].d = mm->resp_tag();
vc_put4stVector(mem_resp_tag, d);
for (size_t i = 0; i < MM_WORD_SIZE*REFILL_COUNT/sizeof(uint32_t); i++)
{
d[i].c = 0;
d[i].d = ((uint32_t*)mm->resp_data())[i];
}
vc_put4stVector(mem_resp_data, d);
mm->tick
(
vc_getScalar(mem_req_val),
&req_rdy,
vc_getScalar(mem_req_store),
vc_4stVectorRef(mem_req_addr)->d,
vc_4stVectorRef(mem_req_tag)->d,
vc_getScalar(mem_req_data_val),
&req_data_rdy,
req_data,
&resp_val,
&resp_tag,
resp_data
req_data
);
vc_putScalar(mem_req_rdy, req_rdy);
vc_putScalar(mem_req_data_rdy, req_data_rdy);
vc_putScalar(mem_resp_val, resp_val);
vec32 t;
t.c = 0;
t.d = resp_tag;
vc_put4stVector(mem_resp_tag, &t);
vec32 d[MM_WORD_SIZE*refill_size/sizeof(uint32_t)];
for (size_t i = 0; i < MM_WORD_SIZE*refill_size/sizeof(uint32_t); i++)
{
d[i].c = 0;
d[i].d = ((uint32_t*)resp_data)[i];
}
vc_put4stVector(mem_resp_data, d);
}
void htif_init
@ -69,10 +59,12 @@ void htif_init
vc_handle fromhost,
vc_handle tohost,
vc_handle width,
vc_handle loadmem
vc_handle loadmem,
vc_handle dramsim
)
{
uint64_t* mem = mm_init();
mm = vc_getScalar(dramsim) ? (mm_t*)(new mm_dramsim2_t) : (mm_t*)(new mm_magic_t);
mm->init(MEM_SIZE);
vec32* fh = vc_4stVectorRef(fromhost);
vec32* th = vc_4stVectorRef(tohost);
@ -81,7 +73,7 @@ void htif_init
char loadmem_str[1024];
vc_VectorToString(loadmem, loadmem_str);
if (*loadmem_str)
load_mem(mem, loadmem_str);
load_mem(mm->get_data(), loadmem_str);
assert(w->d <= 32); // htif_tick assumes data fits in a vec32
htif_phy = new htif_phy_t(w->d, fh->d, th->d);

View File

@ -1,18 +1,18 @@
all: emulator
basedir = ..
include ../Makefrag
all: emulator
CXX := g++
CXXFLAGS := -O1
CXXFLAGS := -O1 -std=c++0x
CXXSRCS := emulator disasm
CXXFLAGS := $(CXXFLAGS) -Itestbench -I$(basedir)/chisel/csrc -I../dramsim2
CXXSRCS := emulator disasm mm mm_dramsim2
CXXFLAGS := $(CXXFLAGS) -I$(basedir)/csrc -I$(basedir)/chisel/csrc -I$(basedir)/dramsim2
OBJS := $(addsuffix .o,$(CXXSRCS) $(MODEL))
DEBUG_OBJS := $(addsuffix -debug.o,$(CXXSRCS) $(MODEL))
CHISEL_ARGS := $(MODEL) --noIoDebug --backend c --targetDir ../emulator/generated-src
CHISEL_ARGS := $(MODEL) --noIoDebug --backend c --targetDir $(basedir)/emulator/generated-src
CHISEL_ARGS_DEBUG := $(CHISEL_ARGS)-debug --debug --vcd --ioDebug
generated-src/$(MODEL).cpp: $(basedir)/riscv-rocket/src/*.scala $(basedir)/riscv-hwacha/src/*.scala $(basedir)/chisel/src/main/scala/* $(basedir)/uncore/src/*.scala
@ -27,18 +27,12 @@ $(MODEL).o: %.o: generated-src/%.cpp
$(MODEL)-debug.o: %-debug.o: generated-src-debug/%.cpp
$(CXX) $(CXXFLAGS) -Igenerated-src-debug -c -o $@ $<
$(addsuffix .o,$(CXXSRCS)): %.o: testbench/%.cc testbench/* generated-src/$(MODEL).cpp
$(addsuffix .o,$(CXXSRCS)): %.o: $(basedir)/csrc/%.cc $(basedir)/csrc/*.h generated-src/$(MODEL).cpp
$(CXX) $(CXXFLAGS) -Igenerated-src -c -o $@ $<
$(addsuffix -debug.o,$(CXXSRCS)): %-debug.o: testbench/%.cc testbench/* generated-src-debug/$(MODEL).cpp
$(addsuffix -debug.o,$(CXXSRCS)): %-debug.o: $(basedir)/csrc/%.cc $(basedir)/csrc/*.h generated-src-debug/$(MODEL).cpp
$(CXX) $(CXXFLAGS) -Igenerated-src-debug -c -o $@ $<
DRAMSIM_OBJS := $(patsubst %.cpp,%.o,$(wildcard ../dramsim2/*.cpp))
$(DRAMSIM_OBJS): %.o: %.cpp
$(CXX) $(CXXFLAGS) -DNO_STORAGE -DNO_OUTPUT -Dmain=nomain -c -o $@ $<
libdramsim.a: $(DRAMSIM_OBJS)
ar rcs $@ $^
emulator: $(OBJS) libdramsim.a
$(CXX) $(CXXFLAGS) -o $@ $(OBJS) -L. -ldramsim
@ -49,7 +43,7 @@ clean:
rm -rf *.o *.a emulator emulator-debug generated-src generated-src-debug DVEfiles output
test:
cd $(basedir)/sbt && $(SBT) "project referencechip" "~make ../emulator run-fast $(CHISEL_ARGS)"
cd $(basedir)/sbt && $(SBT) "project referencechip" "~make $(CURDIR) run-fast $(CHISEL_ARGS)"
#--------------------------------------------------------------------
# Run assembly tests and benchmarks
@ -73,13 +67,13 @@ output:
mkdir -p $@
output/%.run: output/%.hex emulator
fesvr -c -testrun -m3000000 +loadmem=$< none 2> /dev/null
fesvr +dramsim -c -testrun -m3000000 +loadmem=$< none 2> /dev/null
output/%.out: output/%.hex emulator
fesvr -c -testrun -m3000000 -l +loadmem=$< none 2> $@
fesvr +dramsim -c -testrun -m3000000 -l +loadmem=$< none 2> $@
output/%.vpd: output/%.hex emulator-debug
fesvr -c./emulator-debug -testrun -m3000000 -l -v- +loadmem=$< none 2> $(patsubst %.vpd,%.out,$@) | vcd2vpd - $@ > /dev/null && [ $$PIPESTATUS -eq 0 ]
fesvr +dramsim -c./emulator-debug -testrun -m3000000 -l -v- +loadmem=$< none 2> $(patsubst %.vpd,%.out,$@) | vcd2vpd - $@ > /dev/null && [ $$PIPESTATUS -eq 0 ]
run-asm-tests: $(addprefix output/, $(addsuffix .out, $(global_asm_tests) $(global_asm_vm_tests)))
@echo; perl -ne 'print " [$$1] $$ARGV \t$$2\n" if /\*{3}(.{8})\*{3}(.*)/' $^; echo;

@ -1 +1 @@
Subproject commit d3b86dbf35ce1ed000e95b41dd52ee24afc38351
Subproject commit b035b2385282db65c0e700bb4b5136dc3ebcda5d

2
uncore

@ -1 +1 @@
Subproject commit 0a991823dbc67a735cdc98e3063ec43deaf2068a
Subproject commit 5cfbf2bce844b3fec7dd3446dbf15960aba7cb55