diff --git a/.gitignore b/.gitignore index 353d553c..ffd79035 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ target/ project/target +*.swp diff --git a/Makefrag b/Makefrag index 0ef0653c..497b04b2 100644 --- a/Makefrag +++ b/Makefrag @@ -109,6 +109,7 @@ asm_p_tests = \ rv64uf-p-fcmp \ rv64uf-p-fcvt \ rv64uf-p-fcvt_w \ + rv64uf-p-fclass \ rv64uf-p-fadd \ rv64uf-p-fmin \ rv64uf-p-fmadd \ @@ -205,6 +206,7 @@ asm_v_tests = \ rv64uf-v-fcmp \ rv64uf-v-fcvt \ rv64uf-v-fcvt_w \ + rv64uf-v-fclass \ rv64uf-v-fadd \ rv64uf-v-fmin \ rv64uf-v-fmadd \ diff --git a/chisel b/chisel index 25a33ba1..663b8716 160000 --- a/chisel +++ b/chisel @@ -1 +1 @@ -Subproject commit 25a33ba1d456294fe4ebc79fe95339a0d9d20e8a +Subproject commit 663b8716aa157a6b82f7f4e4f7cbfeb59c9bc3b5 diff --git a/csrc/emulator.cc b/csrc/emulator.cc index 6b90d469..f7d73aaa 100644 --- a/csrc/emulator.cc +++ b/csrc/emulator.cc @@ -139,7 +139,7 @@ int main(int argc, char** argv) if (htif->exit_code()) { - fprintf(stderr, "*** FAILED *** (code = %d) after %lld cycles\n", htif->exit_code(), (long long)trace_count); + fprintf(stderr, "*** FAILED *** (code = %d, seed %d) after %lld cycles\n", htif->exit_code(), random_seed, (long long)trace_count); ret = htif->exit_code(); } else if (trace_count == max_cycles) diff --git a/csrc/vcs_main.cc b/csrc/vcs_main.cc index a406a826..cc9e4a06 100644 --- a/csrc/vcs_main.cc +++ b/csrc/vcs_main.cc @@ -8,12 +8,41 @@ #include #include -static htif_emulator_t* htif = NULL; -static unsigned htif_bytes; -static mm_t* mm = NULL; - extern "C" { +extern int vcs_main(int argc, char** argv); + +static htif_emulator_t* htif; +static unsigned htif_bytes; +static mm_t* mm; +static const char* loadmem; + +void htif_fini(vc_handle failure) +{ + delete htif; + htif = NULL; + exit(vc_getScalar(failure)); +} + +int main(int argc, char** argv) +{ + bool dramsim = false; + + for (int i = 1; i < argc; i++) + { + if (!strcmp(argv[i], "+dramsim")) + dramsim = true; + else if (!strncmp(argv[i], "+loadmem=", 9)) + loadmem = argv[i]+9; + } + + mm = dramsim ? (mm_t*)(new mm_dramsim2_t) : (mm_t*)(new mm_magic_t); + htif = new htif_emulator_t(std::vector(argv + 1, argv + argc)); + + vcs_main(argc, argv); + abort(); // should never get here +} + void memory_tick( vc_handle mem_req_val, vc_handle mem_req_rdy, @@ -62,55 +91,18 @@ void memory_tick( ); } -void htif_init -( - vc_handle htif_width, - vc_handle mem_width, - vc_handle argv, - vc_handle loadmem, - vc_handle dramsim -) +void htif_init(vc_handle htif_width, vc_handle mem_width) { int mw = vc_4stVectorRef(mem_width)->d; - mm = vc_getScalar(dramsim) ? (mm_t*)(new mm_dramsim2_t) : (mm_t*)(new mm_magic_t); assert(mw && (mw & (mw-1)) == 0); mm->init(MEM_SIZE, mw/8, LINE_SIZE); + if (loadmem) + load_mem(mm->get_data(), loadmem); + vec32* w = vc_4stVectorRef(htif_width); assert(w->d <= 32 && w->d % 8 == 0); // htif_tick assumes data fits in a vec32 htif_bytes = w->d/8; - - char loadmem_str[1024]; - vc_VectorToString(loadmem, loadmem_str); - if (*loadmem_str) - load_mem(mm->get_data(), loadmem_str); - - char argv_str[1024]; - vc_VectorToString(argv, argv_str); - if (!*argv_str) - { - if (*loadmem_str) - strcpy(argv_str, "none"); - else - { - fprintf(stderr, "Usage: ./simv [host options] +argv=\" [target args]\"\n"); - exit(-1); - } - } - - std::vector args; - std::stringstream ss(argv_str); - std::istream_iterator begin(ss), end; - std::copy(begin, end, std::back_inserter>(args)); - - htif = new htif_emulator_t(args); -} - -void htif_fini(vc_handle failure) -{ - delete htif; - htif = NULL; - exit(vc_getScalar(failure)); } void htif_tick diff --git a/emulator/Makefile b/emulator/Makefile index fba9d352..e72db12b 100644 --- a/emulator/Makefile +++ b/emulator/Makefile @@ -15,7 +15,7 @@ LDFLAGS := $(LDFLAGS) -L$(RISCV)/lib -Wl,-rpath,$(RISCV)/lib -L. -ldramsim -lfes OBJS := $(addsuffix .o,$(CXXSRCS) $(MODEL)) DEBUG_OBJS := $(addsuffix -debug.o,$(CXXSRCS) $(MODEL)) -CHISEL_ARGS := $(MODEL) --noIoDebug --backend c --targetDir emulator/generated-src +CHISEL_ARGS := $(MODEL) --noIoDebug --backend Chisel.Fame1CppBackend --targetDir emulator/generated-src CHISEL_ARGS_DEBUG := $(CHISEL_ARGS)-debug --debug --vcd --ioDebug generated-src/$(MODEL).h: $(base_dir)/rocket/$(src_path)/*.scala $(base_dir)/hwacha/$(src_path)/*.scala $(base_dir)/uncore/$(src_path)/*.scala $(base_dir)/$(src_path)/*.scala diff --git a/hardfloat b/hardfloat index 39a08130..2a05ecbb 160000 --- a/hardfloat +++ b/hardfloat @@ -1 +1 @@ -Subproject commit 39a08130d41ceb9e7f98fa7092fc38970009a460 +Subproject commit 2a05ecbb351304464cfedd02890dafb80bfad6d7 diff --git a/project/build.scala b/project/build.scala index a6f98650..877c66a4 100644 --- a/project/build.scala +++ b/project/build.scala @@ -31,7 +31,8 @@ object BuildSettings extends Build { lazy val uncore = Project("uncore", file("uncore"), settings = buildSettings) dependsOn(hardfloat) lazy val rocket = Project("rocket", file("rocket"), settings = buildSettings) dependsOn(uncore) lazy val hwacha = Project("hwacha", file("hwacha"), settings = buildSettings) dependsOn(uncore, rocket) - lazy val referencechip = Project("referencechip", file("."), settings = buildSettings ++ chipSettings) dependsOn(rocket, hwacha) + lazy val rekall = Project("rekall", file("rekall"), settings = buildSettings) dependsOn(chisel) + lazy val referencechip = Project("referencechip", file("."), settings = buildSettings ++ chipSettings) dependsOn(rocket, hwacha, rekall) val elaborateTask = InputKey[Unit]("elaborate", "convert chisel components into backend source code") val makeTask = InputKey[Unit]("make", "trigger backend-specific makefile command") diff --git a/riscv-tools b/riscv-tools index ebb909ab..bc6bbf50 160000 --- a/riscv-tools +++ b/riscv-tools @@ -1 +1 @@ -Subproject commit ebb909ab9dfff8387449faa5827d47eda693b70b +Subproject commit bc6bbf5024bc5297a928b8620ad0364e44d26cfe diff --git a/rocket b/rocket index 49f633cd..47e883ed 160000 --- a/rocket +++ b/rocket @@ -1 +1 @@ -Subproject commit 49f633cd12de6e69479943d8089563edae7e03f5 +Subproject commit 47e883edc125488b3354729af2669ec8e4123a8b diff --git a/src/main/scala/RocketChip.scala b/src/main/scala/RocketChip.scala index 9355a8ed..5c23b074 100644 --- a/src/main/scala/RocketChip.scala +++ b/src/main/scala/RocketChip.scala @@ -5,11 +5,11 @@ import uncore._ import rocket._ import rocket.Util._ import ReferenceChipBackend._ -import scala.collection.mutable.ArrayBuffer import scala.collection.mutable.HashMap +import DRAMModel._ object DummyTopLevelConstants { - val NTILES = 1 + val NTILES = 2 val NBANKS = 1 val HTIF_WIDTH = 16 val ENABLE_SHARING = true @@ -18,6 +18,10 @@ object DummyTopLevelConstants { val NL2_REL_XACTS = 1 val NL2_ACQ_XACTS = 7 val NMSHRS = 2 + val MEM_TAG_BITS = 5 + val MEM_DATA_BITS = 128 + val MEM_ADDR_BITS = PADDR_BITS - OFFSET_BITS + val MEM_DATA_BEATS = 4 } import DummyTopLevelConstants._ @@ -83,22 +87,25 @@ class ReferenceChipBackend extends VerilogBackend transforms += ((c: Module) => collectNodesIntoComp(initializeDFS)) } +class Fame1ReferenceChipBackend extends ReferenceChipBackend with Fame1Transform + class OuterMemorySystem(htif_width: Int)(implicit conf: UncoreConfiguration) extends Module { - implicit val (tl, ln, l2) = (conf.tl, conf.tl.ln, conf.l2) + implicit val (tl, ln, l2, mif) = (conf.tl, conf.tl.ln, conf.l2, conf.mif) val io = new Bundle { val tiles = Vec.fill(conf.nTiles){new TileLinkIO}.flip val htif = (new TileLinkIO).flip val incoherent = Vec.fill(ln.nClients){Bool()}.asInput - val mem = new ioMem - val mem_backup = new ioMemSerialized(htif_width) + val mem = new MemIO + val mem_backup = new MemSerializedIO(htif_width) val mem_backup_en = Bool(INPUT) } + val refill_cycles = tl.dataBits/mif.dataBits val llc_tag_leaf = Mem(Bits(width = 152), 512, seqRead = true) val llc_data_leaf = Mem(Bits(width = 64), 4096, seqRead = true) - val llc = Module(new DRAMSideLLC(sets=512, ways=8, outstanding=16, tagLeaf=llc_tag_leaf, dataLeaf=llc_data_leaf)) - //val llc = Module(new DRAMSideLLCNull(NL2_REL_XACTS+NL2_ACQ_XACTS, REFILL_CYCLES)) + val llc = Module(new DRAMSideLLC(sets=512, ways=8, outstanding=16, refill_cycles=refill_cycles, tagLeaf=llc_tag_leaf, dataLeaf=llc_data_leaf)) + //val llc = Module(new DRAMSideLLCNull(NL2_REL_XACTS+NL2_ACQ_XACTS, refill_cycles)) val mem_serdes = Module(new MemSerdes(htif_width)) val masterEndpoints = (0 until ln.nMasters).map(i => Module(new L2CoherenceAgent(i))) @@ -116,7 +123,7 @@ class OuterMemorySystem(htif_width: Int)(implicit conf: UncoreConfiguration) ext conv.io.uncached <> masterEndpoints.head.io.master } llc.io.cpu.req_cmd <> Queue(conv.io.mem.req_cmd) - llc.io.cpu.req_data <> Queue(conv.io.mem.req_data, REFILL_CYCLES) + llc.io.cpu.req_data <> Queue(conv.io.mem.req_data, refill_cycles) conv.io.mem.resp <> llc.io.cpu.resp // mux between main and backup memory ports @@ -128,7 +135,7 @@ class OuterMemorySystem(htif_width: Int)(implicit conf: UncoreConfiguration) ext mem_serdes.io.wide.req_cmd.valid := mem_cmdq.io.deq.valid && io.mem_backup_en mem_serdes.io.wide.req_cmd.bits := mem_cmdq.io.deq.bits - val mem_dataq = Module(new Queue(new MemData, REFILL_CYCLES)) + val mem_dataq = Module(new Queue(new MemData, refill_cycles)) mem_dataq.io.enq <> llc.io.mem.req_data mem_dataq.io.deq.ready := Mux(io.mem_backup_en, mem_serdes.io.wide.req_data.ready, io.mem.req_data.ready) io.mem.req_data.valid := mem_dataq.io.deq.valid && !io.mem_backup_en @@ -143,18 +150,18 @@ class OuterMemorySystem(htif_width: Int)(implicit conf: UncoreConfiguration) ext io.mem_backup <> mem_serdes.io.narrow } -case class UncoreConfiguration(l2: L2CoherenceAgentConfiguration, tl: TileLinkConfiguration, nTiles: Int, nBanks: Int, bankIdLsb: Int, nSCR: Int) +case class UncoreConfiguration(l2: L2CoherenceAgentConfiguration, tl: TileLinkConfiguration, mif: MemoryIFConfiguration, nTiles: Int, nBanks: Int, bankIdLsb: Int, nSCR: Int) class Uncore(htif_width: Int)(implicit conf: UncoreConfiguration) extends Module { - implicit val tl = conf.tl + implicit val (tl, mif) = (conf.tl, conf.mif) val io = new Bundle { val host = new HostIO(htif_width) - val mem = new ioMem + val mem = new MemIO val tiles = Vec.fill(conf.nTiles){new TileLinkIO}.flip val htif = Vec.fill(conf.nTiles){new HTIFIO(conf.nTiles)}.flip val incoherent = Vec.fill(conf.nTiles){Bool()}.asInput - val mem_backup = new ioMemSerialized(htif_width) + val mem_backup = new MemSerializedIO(htif_width) val mem_backup_en = Bool(INPUT) } val htif = Module(new HTIF(htif_width, CSRs.reset, conf.nSCR)) @@ -167,21 +174,15 @@ class Uncore(htif_width: Int)(implicit conf: UncoreConfiguration) extends Module // Add networking headers and endpoint queues def convertAddrToBank(addr: Bits): UInt = { - require(conf.bankIdLsb + log2Up(conf.nBanks) < MEM_ADDR_BITS, {println("Invalid bits for bank multiplexing.")}) + require(conf.bankIdLsb + log2Up(conf.nBanks) < conf.mif.addrBits, {println("Invalid bits for bank multiplexing.")}) addr(conf.bankIdLsb + log2Up(conf.nBanks) - 1, conf.bankIdLsb) } (outmemsys.io.tiles :+ outmemsys.io.htif).zip(io.tiles :+ htif.io.mem).zipWithIndex.map { case ((outer, client), i) => - outer.acquire <> TileLinkHeaderAppender(client.acquire, i, conf.nBanks, convertAddrToBank _) - outer.release <> TileLinkHeaderAppender(client.release, i, conf.nBanks, convertAddrToBank _) - - val grant_ack_q = Queue(client.grant_ack) - outer.grant_ack.valid := grant_ack_q.valid - outer.grant_ack.bits := grant_ack_q.bits - outer.grant_ack.bits.header.src := UInt(i) - grant_ack_q.ready := outer.grant_ack.ready - + outer.acquire <> Queue(TileLinkHeaderOverwriter(client.acquire, i, conf.nBanks, convertAddrToBank _)) + outer.release <> Queue(TileLinkHeaderOverwriter(client.release, i, conf.nBanks, convertAddrToBank _)) + outer.grant_ack <> Queue(TileLinkHeaderOverwriter(client.grant_ack, i)) client.grant <> Queue(outer.grant, 1, pipe = true) client.probe <> Queue(outer.probe) } @@ -216,12 +217,12 @@ class Uncore(htif_width: Int)(implicit conf: UncoreConfiguration) extends Module io.host.debug_stats_pcr := htif.io.host.debug_stats_pcr } -class TopIO(htifWidth: Int) extends Bundle { +class TopIO(htifWidth: Int)(implicit conf: MemoryIFConfiguration) extends Bundle { val host = new HostIO(htifWidth) - val mem = new ioMem + val mem = new MemIO } -class VLSITopIO(htifWidth: Int) extends TopIO(htifWidth) { +class VLSITopIO(htifWidth: Int)(implicit conf: MemoryIFConfiguration) extends TopIO(htifWidth)(conf) { val mem_backup_en = Bool(INPUT) val in_mem_ready = Bool(OUTPUT) val in_mem_valid = Bool(INPUT) @@ -231,14 +232,16 @@ class VLSITopIO(htifWidth: Int) extends TopIO(htifWidth) { class MemDessert extends Module { + implicit val mif = MemoryIFConfiguration(MEM_ADDR_BITS, MEM_DATA_BITS, MEM_TAG_BITS, MEM_DATA_BEATS) val io = new MemDesserIO(HTIF_WIDTH) val x = Module(new MemDesser(HTIF_WIDTH)) io.narrow <> x.io.narrow io.wide <> x.io.wide } + class Top extends Module { - val co = if(ENABLE_SHARING) { + val co = if(ENABLE_SHARING) { if(ENABLE_CLEAN_EXCLUSIVE) new MESICoherence else new MSICoherence } else { @@ -247,14 +250,15 @@ class Top extends Module { } implicit val ln = LogicalNetworkConfiguration(log2Up(NTILES)+1, NBANKS, NTILES+1) - implicit val tl = TileLinkConfiguration(co, ln, log2Up(NL2_REL_XACTS+NL2_ACQ_XACTS), 2*log2Up(NMSHRS*NTILES+1), MEM_DATA_BITS) + implicit val tl = TileLinkConfiguration(co, ln, log2Up(NL2_REL_XACTS+NL2_ACQ_XACTS), 2*log2Up(NMSHRS*NTILES+1), CACHE_DATA_SIZE_IN_BYTES*8) implicit val l2 = L2CoherenceAgentConfiguration(tl, NL2_REL_XACTS, NL2_ACQ_XACTS) - implicit val uc = UncoreConfiguration(l2, tl, NTILES, NBANKS, bankIdLsb = 5, nSCR = 64) + implicit val mif = MemoryIFConfiguration(MEM_ADDR_BITS, MEM_DATA_BITS, MEM_TAG_BITS, MEM_DATA_BEATS) + implicit val uc = UncoreConfiguration(l2, tl, mif, NTILES, NBANKS, bankIdLsb = 5, nSCR = 64) - val ic = ICacheConfig(128, 2, ntlb = 8, nbtb = 38) + val ic = ICacheConfig(128, 2, ntlb = 8, nbtb = 38, tl = tl) val dc = DCacheConfig(128, 4, ntlb = 8, - nmshr = NMSHRS, nrpq = 16, nsdq = 17, states = co.nClientStates) - val vic = ICacheConfig(128, 1) + nmshr = NMSHRS, nrpq = 16, nsdq = 17, tl = tl) + val vic = ICacheConfig(128, 1, tl = tl) val hc = hwacha.HwachaConfiguration(vic, dc, 8, 256, ndtlb = 8, nptlb = 2) val fpu = if (HAS_FPU) Some(FPUConfig(sfmaLatency = 2, dfmaLatency = 3)) else None val rc = RocketConfiguration(tl, ic, dc, fpu @@ -295,3 +299,5 @@ class Top extends Module { io.mem_backup_en <> uncore.io.mem_backup_en io.mem <> uncore.io.mem } + + diff --git a/src/main/scala/fpga.scala b/src/main/scala/fpga.scala index f454fe0a..28aedc33 100644 --- a/src/main/scala/fpga.scala +++ b/src/main/scala/fpga.scala @@ -4,15 +4,17 @@ import Chisel._ import Node._ import uncore._ import rocket._ +import DRAMModel._ +import DRAMModel.MemModelConstants._ class FPGAOuterMemorySystem(htif_width: Int)(implicit conf: UncoreConfiguration) extends Module { - implicit val (tl, ln, l2) = (conf.tl, conf.tl.ln, conf.l2) + implicit val (tl, ln, l2, mif) = (conf.tl, conf.tl.ln, conf.l2, conf.mif) val io = new Bundle { val tiles = Vec.fill(conf.nTiles){new TileLinkIO}.flip val htif = (new TileLinkIO).flip val incoherent = Vec.fill(ln.nClients){Bool()}.asInput - val mem = new ioMem + val mem = new MemIO } val masterEndpoints = (0 until ln.nMasters).map(i => Module(new L2CoherenceAgent(i))) @@ -31,16 +33,16 @@ class FPGAOuterMemorySystem(htif_width: Int)(implicit conf: UncoreConfiguration) conv.io.uncached <> masterEndpoints.head.io.master } io.mem.req_cmd <> Queue(conv.io.mem.req_cmd) - io.mem.req_data <> Queue(conv.io.mem.req_data, REFILL_CYCLES) + io.mem.req_data <> Queue(conv.io.mem.req_data, tl.dataBits/mif.dataBits) conv.io.mem.resp <> Queue(io.mem.resp) } class FPGAUncore(htif_width: Int)(implicit conf: UncoreConfiguration) extends Module { - implicit val (tl, ln) = (conf.tl, conf.tl.ln) + implicit val (tl, ln, mif) = (conf.tl, conf.tl.ln, conf.mif) val io = new Bundle { val host = new HostIO(htif_width) - val mem = new ioMem + val mem = new MemIO val tiles = Vec.fill(conf.nTiles){new TileLinkIO}.flip val htif = Vec.fill(conf.nTiles){new HTIFIO(conf.nTiles)}.flip val incoherent = Vec.fill(conf.nTiles){Bool()}.asInput @@ -54,21 +56,15 @@ class FPGAUncore(htif_width: Int)(implicit conf: UncoreConfiguration) extends Mo // Add networking headers and endpoint queues def convertAddrToBank(addr: Bits): UInt = { - require(conf.bankIdLsb + log2Up(conf.nBanks) < MEM_ADDR_BITS, {println("Invalid bits for bank multiplexing.")}) + require(conf.bankIdLsb + log2Up(conf.nBanks) < conf.mif.addrBits, {println("Invalid bits for bank multiplexing.")}) addr(conf.bankIdLsb + log2Up(conf.nBanks) - 1, conf.bankIdLsb) } (outmemsys.io.tiles :+ outmemsys.io.htif).zip(io.tiles :+ htif.io.mem).zipWithIndex.map { case ((outer, client), i) => - outer.acquire <> TileLinkHeaderAppender(client.acquire, i, conf.nBanks, convertAddrToBank _) - outer.release <> TileLinkHeaderAppender(client.release, i, conf.nBanks, convertAddrToBank _) - - val grant_ack_q = Queue(client.grant_ack) - outer.grant_ack.valid := grant_ack_q.valid - outer.grant_ack.bits := grant_ack_q.bits - outer.grant_ack.bits.header.src := UInt(i) - grant_ack_q.ready := outer.grant_ack.ready - + outer.acquire <> Queue(TileLinkHeaderOverwriter(client.acquire, i, conf.nBanks, convertAddrToBank _)) + outer.release <> Queue(TileLinkHeaderOverwriter(client.release, i, conf.nBanks, convertAddrToBank _)) + outer.grant_ack <> Queue(TileLinkHeaderOverwriter(client.release, i)) client.grant <> Queue(outer.grant, 1, pipe = true) client.probe <> Queue(outer.probe) } @@ -77,26 +73,32 @@ class FPGAUncore(htif_width: Int)(implicit conf: UncoreConfiguration) extends Mo htif.io.host.in <> io.host.in } -class FPGATopIO(htifWidth: Int) extends TopIO(htifWidth) - -class FPGATop extends Module { - val htif_width = 16 +class ReferenceChip(htif_width: Int)(implicit mif: MemoryIFConfiguration) extends Module { + val io = new Bundle { + val host_in = new DecoupledIO(new HostPacket(htif_width)).flip() + val host_out = new DecoupledIO(new HostPacket(htif_width)) + val host_clk = Bool(OUTPUT) + val host_clk_edge = Bool(OUTPUT) + val host_debug_stats_pcr = Bool(OUTPUT) + val mem_req_cmd = new DecoupledIO(new MemReqCmd()) + val mem_req_data = new DecoupledIO(new MemData()) + val mem_resp = (new DecoupledIO(new MemResp())).flip() + } + val co = new MESICoherence val ntiles = 1 val nbanks = 1 val nmshrs = 2 implicit val ln = LogicalNetworkConfiguration(log2Up(ntiles)+1, nbanks, ntiles+1) - implicit val tl = TileLinkConfiguration(co, ln, log2Up(1+8), 2*log2Up(nmshrs*ntiles+1), MEM_DATA_BITS) + implicit val tl = TileLinkConfiguration(co, ln, log2Up(1+8), 2*log2Up(nmshrs*ntiles+1), CACHE_DATA_SIZE_IN_BYTES*8) implicit val l2 = L2CoherenceAgentConfiguration(tl, 1, 8) - implicit val uc = UncoreConfiguration(l2, tl, ntiles, nbanks, bankIdLsb = 5, nSCR = 64) + implicit val uc = UncoreConfiguration(l2, tl, mif, ntiles, nbanks, bankIdLsb = 5, nSCR = 64) - val ic = ICacheConfig(64, 1, ntlb = 4, nbtb = 4) - val dc = DCacheConfig(64, 1, ntlb = 4, nmshr = 2, nrpq = 16, nsdq = 17, states = co.nClientStates) + val ic = ICacheConfig(64, 1, ntlb = 4, nbtb = 4, tl = tl) + val dc = DCacheConfig(64, 1, ntlb = 4, nmshr = 2, nrpq = 16, nsdq = 17, tl = tl) val rc = RocketConfiguration(tl, ic, dc, fpu = None, fastMulDiv = false) - val io = new FPGATopIO(htif_width) - val resetSigs = Vec.fill(uc.nTiles){Bool()} val tileList = (0 until uc.nTiles).map(r => Module(new Tile(resetSignal = resetSigs(r))(rc))) val uncore = Module(new FPGAUncore(htif_width)) @@ -118,9 +120,93 @@ class FPGATop extends Module { hl.ipi_req <> Queue(tile.io.host.ipi_req) tile.io.host.ipi_rep <> Queue(hl.ipi_rep) } + + io.host_in.ready := uncore.io.host.in.ready + uncore.io.host.in.bits := io.host_in.bits.data + uncore.io.host.in.valid := io.host_in.valid + + uncore.io.host.out.ready := io.host_out.ready + io.host_out.bits.data := uncore.io.host.out.bits + io.host_out.valid := uncore.io.host.out.valid + + io.host_clk := uncore.io.host.clk + io.host_clk_edge := uncore.io.host.clk_edge + io.host_debug_stats_pcr := uncore.io.host.debug_stats_pcr - io.host <> uncore.io.host - io.mem <> uncore.io.mem + io.mem_req_cmd <> uncore.io.mem.req_cmd + io.mem_req_data <> uncore.io.mem.req_data + io.mem_resp <> uncore.io.mem.resp +} + + +class FPGATopIO(htifWidth: Int)(implicit conf: MemoryIFConfiguration) extends TopIO(htifWidth)(conf) + +class FPGATop extends Module { + val htif_width = 16 + + implicit val mif = MemoryIFConfiguration(PADDR_BITS - OFFSET_BITS, 128, 5, 4) + val deviceWidth = ROW_WIDTH/mif.dataBits + implicit val mc = MemoryControllerConfiguration(deviceWidth, (if(deviceWidth == 4) 0 else log2Up(deviceWidth/4)), mif) + + val io = new FPGATopIO(htif_width) + + val referenceChip = Module(new Fame1Wrapper(new ReferenceChip(htif_width))) + val dramModel = Module(new DRAMSystemWrapper()) + //dram model parameters setup + dramModel.io.params.tRAS := UInt(4) + dramModel.io.params.tRCD := UInt(4) + dramModel.io.params.tRP := UInt(4) + dramModel.io.params.tCCD := UInt(4) + dramModel.io.params.tRTP := UInt(4) + dramModel.io.params.tWTR := UInt(4) + dramModel.io.params.tWR := UInt(4) + dramModel.io.params.tRRD := UInt(4) + + //host to reference chip connections + referenceChip.DecoupledIOs("host_in").host_valid := Bool(true) + referenceChip.DecoupledIOs("host_in").target.bits := io.host.in.bits + referenceChip.DecoupledIOs("host_in").target.valid := io.host.in.valid + io.host.in.ready := referenceChip.DecoupledIOs("host_in").host_ready && referenceChip.DecoupledIOs("host_in").target.ready + + io.host.out.valid := referenceChip.DecoupledIOs("host_out").host_valid && referenceChip.DecoupledIOs("host_out").target.valid + io.host.out.bits := referenceChip.DecoupledIOs("host_out").target.bits + referenceChip.DecoupledIOs("host_out").target.ready := io.host.out.ready + referenceChip.DecoupledIOs("host_out").host_ready := Bool(true) + + io.host.clk := referenceChip.DebugIOs("host_clk") + io.host.clk_edge := referenceChip.DebugIOs("host_clk_edge") + io.host.debug_stats_pcr := referenceChip.DebugIOs("host_debug_stats_pcr") + + //reference chip to dram model connections + val mem_req_cmd_queue = Module(new FameQueue(8)(new MemReqCmd())) + val mem_req_data_queue = Module(new FameQueue(8)(new MemData())) + val mem_resp_queue = Module(new FameQueue(8)(new MemResp())) + + //cmd queue + FameDecoupledIO.connect(referenceChip.DecoupledIOs("mem_req_cmd"), mem_req_cmd_queue.io.enq, new MemReqCmd) + mem_req_cmd_queue.io.deq <> dramModel.io.memReqCmd + + //data queue + FameDecoupledIO.connect(referenceChip.DecoupledIOs("mem_req_data"), mem_req_data_queue.io.enq, new MemData) + mem_req_data_queue.io.deq <> dramModel.io.memReqData + + //resp queue + mem_resp_queue.io.enq <> dramModel.io.memResp + FameDecoupledIO.connect(referenceChip.DecoupledIOs("mem_resp"), mem_resp_queue.io.deq, new MemResp) + + //dram model to outside memory connections + val host_mem_cmd_queue = Module(new Queue(new MemReqCmd, 2)) + val host_mem_data_queue = Module(new Queue(new MemData, mif.dataBeats)) + val host_mem_resp_queue = Module(new Queue(new MemResp, mif.dataBeats)) + + host_mem_cmd_queue.io.enq <> dramModel.io.mem.req_cmd + host_mem_cmd_queue.io.deq <> io.mem.req_cmd + + host_mem_data_queue.io.enq <> dramModel.io.mem.req_data + host_mem_data_queue.io.deq <> io.mem.req_data + + host_mem_resp_queue.io.enq <> io.mem.resp + host_mem_resp_queue.io.deq <> dramModel.io.mem.resp } abstract class AXISlave extends Module { @@ -174,7 +260,7 @@ class Slave extends AXISlave // write cr1 -> mem.resp (nonblocking) val in_count = Reg(init=UInt(0, log2Up(memw/dw))) - val rf_count = Reg(init=UInt(0, log2Up(REFILL_CYCLES))) + val rf_count = Reg(init=UInt(0, log2Up(CACHE_DATA_SIZE_IN_BYTES*8/memw))) require(memw % dw == 0 && isPow2(memw/dw)) val in_reg = Reg(top.io.mem.resp.bits.data) top.io.mem.resp.bits.data := Cat(io.in.bits, in_reg(in_reg.getWidth-1,dw)) diff --git a/src/main/scala/network.scala b/src/main/scala/network.scala index 109d4a2b..57d960a1 100644 --- a/src/main/scala/network.scala +++ b/src/main/scala/network.scala @@ -5,115 +5,42 @@ import uncore._ import scala.reflect._ import scala.reflect.runtime.universe._ -object TileLinkHeaderAppender { - def apply[T <: ClientSourcedMessage with HasPhysicalAddress, U <: ClientSourcedMessage with HasTileLinkData](in: PairedDataIO[LogicalNetworkIO[T],LogicalNetworkIO[U]], clientId: Int, nBanks: Int, addrConvert: Bits => UInt)(implicit conf: TileLinkConfiguration) = { - val shim = Module(new TileLinkHeaderAppender(in.meta.bits.payload, in.data.bits.payload, clientId, nBanks, addrConvert)) - shim.io.in <> in - shim.io.out +object TileLinkHeaderOverwriter { + def apply[T <: ClientSourcedMessage](in: DecoupledIO[LogicalNetworkIO[T]], clientId: Int)(implicit conf: TileLinkConfiguration): DecoupledIO[LogicalNetworkIO[T]] = { + val out = in.clone.asDirectionless + out.bits.payload := in.bits.payload + out.bits.header.src := UInt(clientId) + out.bits.header.dst := in.bits.header.dst + out.valid := in.valid + in.ready := out.ready + out } - def apply[T <: ClientSourcedMessage with HasPhysicalAddress](in: DecoupledIO[LogicalNetworkIO[T]], clientId: Int, nBanks: Int, addrConvert: Bits => UInt)(implicit conf: TileLinkConfiguration) = { - val shim = Module(new TileLinkHeaderAppender(in.bits.payload.clone, new AcquireData, clientId, nBanks, addrConvert)) - shim.io.in.meta <> in - shim.io.out.meta + def apply[T <: ClientSourcedMessage with HasPhysicalAddress](in: DecoupledIO[LogicalNetworkIO[T]], clientId: Int, nBanks: Int, addrConvert: UInt => UInt)(implicit conf: TileLinkConfiguration): DecoupledIO[LogicalNetworkIO[T]] = { + val out: DecoupledIO[LogicalNetworkIO[T]] = apply(in, clientId) + out.bits.header.dst := (if(nBanks > 1) addrConvert(in.bits.payload.addr) else UInt(0)) + out } } -class TileLinkHeaderAppender[T <: ClientSourcedMessage with HasPhysicalAddress, U <: ClientSourcedMessage with HasTileLinkData](mType: T, dType: U, clientId: Int, nBanks: Int, addrConvert: Bits => UInt)(implicit conf: TileLinkConfiguration) extends Module { - implicit val ln = conf.ln - val io = new Bundle { - val in = new PairedDataIO(new LogicalNetworkIO(mType), new LogicalNetworkIO(dType)).flip - val out = new PairedDataIO(new LogicalNetworkIO(mType), new LogicalNetworkIO(dType)) - } - - val meta_q = Queue(io.in.meta) - val data_q = Queue(io.in.data) - if(nBanks == 1) { - io.out.meta.bits.payload := meta_q.bits.payload - io.out.meta.bits.header.src := UInt(clientId) - io.out.meta.bits.header.dst := UInt(0) - io.out.meta.valid := meta_q.valid - meta_q.ready := io.out.meta.ready - io.out.data.bits.payload := data_q.bits.payload - io.out.data.bits.header.src := UInt(clientId) - io.out.data.bits.header.dst := UInt(0) - io.out.data.valid := data_q.valid - data_q.ready := io.out.data.ready - } else { - val meta_has_data = conf.co.messageHasData(meta_q.bits.payload) - val addr_q = Module(new Queue(io.in.meta.bits.payload.addr.clone, 2, pipe = true, flow = true)) - val data_cnt = Reg(init=UInt(0, width = log2Up(REFILL_CYCLES))) - val data_cnt_up = data_cnt + UInt(1) - - io.out.meta.bits.payload := meta_q.bits.payload - io.out.meta.bits.header.src := UInt(clientId) - io.out.meta.bits.header.dst := addrConvert(meta_q.bits.payload.addr) - io.out.data.bits.payload := meta_q.bits.payload - io.out.data.bits.header.src := UInt(clientId) - io.out.data.bits.header.dst := addrConvert(addr_q.io.deq.bits) - addr_q.io.enq.bits := meta_q.bits.payload.addr - - io.out.meta.valid := meta_q.valid && addr_q.io.enq.ready - meta_q.ready := io.out.meta.ready && addr_q.io.enq.ready - io.out.data.valid := data_q.valid && addr_q.io.deq.valid - data_q.ready := io.out.data.ready && addr_q.io.deq.valid - addr_q.io.enq.valid := meta_q.valid && io.out.meta.ready && meta_has_data - addr_q.io.deq.ready := Bool(false) - - when(data_q.valid && data_q.ready) { - data_cnt := data_cnt_up - when(data_cnt_up === UInt(0)) { - addr_q.io.deq.ready := Bool(true) - } - } - } -} - -//Adapter betweewn an UncachedTileLinkIO and a mem controller MemIO -class MemIOUncachedTileLinkIOConverter(qDepth: Int)(implicit conf: TileLinkConfiguration) extends Module { - val io = new Bundle { - val uncached = new UncachedTileLinkIO().flip - val mem = new ioMem - } - val mem_cmd_q = Module(new Queue(new MemReqCmd, qDepth)) - val mem_data_q = Module(new Queue(new MemData, qDepth)) - mem_cmd_q.io.enq.valid := io.uncached.acquire.meta.valid - io.uncached.acquire.meta.ready := mem_cmd_q.io.enq.ready - mem_cmd_q.io.enq.bits.rw := conf.co.needsOuterWrite(io.uncached.acquire.meta.bits.payload.a_type, UInt(0)) - mem_cmd_q.io.enq.bits.tag := io.uncached.acquire.meta.bits.payload.client_xact_id - mem_cmd_q.io.enq.bits.addr := io.uncached.acquire.meta.bits.payload.addr - mem_data_q.io.enq.valid := io.uncached.acquire.data.valid - io.uncached.acquire.data.ready := mem_data_q.io.enq.ready - mem_data_q.io.enq.bits.data := io.uncached.acquire.data.bits.payload.data - io.uncached.grant.valid := io.mem.resp.valid - io.mem.resp.ready := io.uncached.grant.ready - io.uncached.grant.bits.payload.data := io.mem.resp.bits.data - io.uncached.grant.bits.payload.client_xact_id := io.mem.resp.bits.tag - io.uncached.grant.bits.payload.master_xact_id := UInt(0) // DNC - io.uncached.grant.bits.payload.g_type := UInt(0) // DNC - io.mem.req_cmd <> mem_cmd_q.io.deq - io.mem.req_data <> mem_data_q.io.deq -} - class ReferenceChipCrossbarNetwork(implicit conf: UncoreConfiguration) extends LogicalNetwork[TileLinkIO]()(conf.tl.ln) { implicit val (tl, ln, co) = (conf.tl, conf.tl.ln, conf.tl.co) val io = new Bundle { val clients = Vec.fill(ln.nClients){(new TileLinkIO).flip} val masters = Vec.fill(ln.nMasters){new TileLinkIO} } + implicit val pconf = new PhysicalNetworkConfiguration(ln.nEndpoints, ln.idBits) // Same config for all networks // Actually instantiate the particular networks required for TileLink - val acqNet = Module(new PairedCrossbar(new Acquire, new AcquireData, REFILL_CYCLES, (acq: PhysicalNetworkIO[Acquire]) => co.messageHasData(acq.payload))) - val relNet = Module(new PairedCrossbar(new Release, new ReleaseData, REFILL_CYCLES, (rel: PhysicalNetworkIO[Release]) => co.messageHasData(rel.payload))) - val probeNet = Module(new BasicCrossbar(new Probe)) - val grantNet = Module(new BasicCrossbar(new Grant)) + val acqNet = Module(new BasicCrossbar(new Acquire)) + val relNet = Module(new BasicCrossbar(new Release)) + val prbNet = Module(new BasicCrossbar(new Probe)) + val gntNet = Module(new BasicCrossbar(new Grant)) val ackNet = Module(new BasicCrossbar(new GrantAck)) // Aliases for the various network IO bundle types type FBCIO[T <: Data] = DecoupledIO[PhysicalNetworkIO[T]] type FLNIO[T <: Data] = DecoupledIO[LogicalNetworkIO[T]] - type PBCIO[M <: Data, D <: Data] = PairedDataIO[PhysicalNetworkIO[M], PhysicalNetworkIO[D]] - type PLNIO[M <: Data, D <: Data] = PairedDataIO[LogicalNetworkIO[M], LogicalNetworkIO[D]] type FromCrossbar[T <: Data] = FBCIO[T] => FLNIO[T] type ToCrossbar[T <: Data] = FLNIO[T] => FBCIO[T] @@ -176,10 +103,10 @@ class ReferenceChipCrossbarNetwork(implicit conf: UncoreConfiguration) extends L def doFIFOHookup[T <: Data](isEndpointSourceOfMessage: Boolean, physIn: FBCIO[T], physOut: FBCIO[T], logIO: FLNIO[T], inShim: ToCrossbar[T], outShim: FromCrossbar[T]) = { if(isEndpointSourceOfMessage) doFIFOInputHookup(physIn, physOut, logIO, inShim) - else doFIFOOutputHookup(physIn, physOut, logIO, outShim) + else doFIFOOutputHookup(physIn, physOut, logIO, outShim) } - //Hookup all instances of a particular subbundle of + //Hookup all instances of a particular subbundle of TileLink def doFIFOHookups[T <: Data: TypeTag](physIO: BasicCrossbarIO[T], getLogIO: TileLinkIO => FLNIO[T]) = { typeTag[T].tpe match{ case t if t <:< typeTag[ClientSourcedMessage].tpe => { @@ -194,33 +121,9 @@ class ReferenceChipCrossbarNetwork(implicit conf: UncoreConfiguration) extends L } } - def doPairedDataHookup[T <: Data, R <: Data](isEndpointSourceOfMessage: Boolean, physIn: PBCIO[T,R], physOut: PBCIO[T,R], logIO: PLNIO[T,R], inShim: ToCrossbar[T], outShim: FromCrossbar[T], inShimD: ToCrossbar[R], outShimD: FromCrossbar[R]) = { - if(isEndpointSourceOfMessage) { - doFIFOInputHookup[T](physIn.meta, physOut.meta, logIO.meta, inShim) - doFIFOInputHookup[R](physIn.data, physOut.data, logIO.data, inShimD) - } else { - doFIFOOutputHookup[T](physIn.meta, physOut.meta, logIO.meta, outShim) - doFIFOOutputHookup[R](physIn.data, physOut.data, logIO.data, outShimD) - } - } - - def doPairedDataHookups[T <: Data: TypeTag, R <: Data](physIO: PairedCrossbarIO[T,R], getLogIO: TileLinkIO => PLNIO[T,R]) = { - typeTag[T].tpe match{ - case t if t <:< typeTag[ClientSourcedMessage].tpe => { - io.masters.zipWithIndex.map{ case (i, id) => doPairedDataHookup[T,R](false, physIO.in(id), physIO.out(id), getLogIO(i), ClientToCrossbarShim, CrossbarToMasterShim, ClientToCrossbarShim, CrossbarToMasterShim) } - io.clients.zipWithIndex.map{ case (i, id) => doPairedDataHookup[T,R](true, physIO.in(id+ln.nMasters), physIO.out(id+ln.nMasters), getLogIO(i), ClientToCrossbarShim, CrossbarToMasterShim, ClientToCrossbarShim, CrossbarToMasterShim) } - } - case t if t <:< typeTag[MasterSourcedMessage].tpe => { - io.masters.zipWithIndex.map{ case (i, id) => doPairedDataHookup[T,R](true, physIO.in(id), physIO.out(id), getLogIO(i), MasterToCrossbarShim, CrossbarToClientShim, MasterToCrossbarShim, CrossbarToClientShim) } - io.clients.zipWithIndex.map{ case (i, id) => doPairedDataHookup[T,R](false, physIO.in(id+ln.nMasters), physIO.out(id+ln.nMasters), getLogIO(i), MasterToCrossbarShim, CrossbarToClientShim, MasterToCrossbarShim, CrossbarToClientShim) } - } - case _ => require(false, "Unknown message sourcing.") - } - } - - doPairedDataHookups(acqNet.io, (tl: TileLinkIO) => tl.acquire) - doPairedDataHookups(relNet.io, (tl: TileLinkIO) => tl.release) - doFIFOHookups(probeNet.io, (tl: TileLinkIO) => tl.probe) - doFIFOHookups(grantNet.io, (tl: TileLinkIO) => tl.grant) + doFIFOHookups(acqNet.io, (tl: TileLinkIO) => tl.acquire) + doFIFOHookups(relNet.io, (tl: TileLinkIO) => tl.release) + doFIFOHookups(prbNet.io, (tl: TileLinkIO) => tl.probe) + doFIFOHookups(gntNet.io, (tl: TileLinkIO) => tl.grant) doFIFOHookups(ackNet.io, (tl: TileLinkIO) => tl.grant_ack) } diff --git a/uncore b/uncore index 803308e9..67589ccc 160000 --- a/uncore +++ b/uncore @@ -1 +1 @@ -Subproject commit 803308e917397776444c3e3696fbfacad709a1db +Subproject commit 67589ccca64716a9be5ab94d50854a8827431be7