From eaa4b04ee51a2cee323080fe06da472c05e55499 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Wed, 7 Sep 2016 18:29:41 -0700 Subject: [PATCH 1/3] Check D$ store->load collisions more precisely Tolerate, for example, a half-word store and a half-word load to different halves of the same word. --- src/main/scala/rocket/dcache.scala | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/main/scala/rocket/dcache.scala b/src/main/scala/rocket/dcache.scala index 5f313f74..b7859881 100644 --- a/src/main/scala/rocket/dcache.scala +++ b/src/main/scala/rocket/dcache.scala @@ -173,9 +173,9 @@ class DCache(implicit p: Parameters) extends L1HellaCacheModule()(p) { when (s2_valid && (!s2_valid_hit || s2_update_meta)) { s1_nack := true } // exceptions - val misaligned = new StoreGen(s1_req.typ, s1_req.addr, UInt(0), wordBytes).misaligned - io.cpu.xcpt.ma.ld := s1_read && misaligned - io.cpu.xcpt.ma.st := s1_write && misaligned + val s1_storegen = new StoreGen(s1_req.typ, s1_req.addr, UInt(0), wordBytes) + io.cpu.xcpt.ma.ld := s1_read && s1_storegen.misaligned + io.cpu.xcpt.ma.st := s1_write && s1_storegen.misaligned io.cpu.xcpt.pf.ld := s1_read && tlb.io.resp.xcpt_ld io.cpu.xcpt.pf.st := s1_write && tlb.io.resp.xcpt_st @@ -232,8 +232,8 @@ class DCache(implicit p: Parameters) extends L1HellaCacheModule()(p) { // store->load RAW hazard detection val s1_idx = s1_req.addr(idxMSB, wordOffBits) val s1_raw_hazard = s1_read && - ((pstore1_valid && pstore1_addr(idxMSB, wordOffBits) === s1_idx) || - (pstore2_valid && pstore2_addr(idxMSB, wordOffBits) === s1_idx)) + ((pstore1_valid && pstore1_addr(idxMSB, wordOffBits) === s1_idx && (pstore1_storegen.mask & s1_storegen.mask).orR) || + (pstore2_valid && pstore2_addr(idxMSB, wordOffBits) === s1_idx && (pstore2_storegen_mask & s1_storegen.mask).orR)) when (s1_valid && s1_raw_hazard) { s1_nack := true } metaWriteArb.io.in(0).valid := (s2_valid_hit && s2_update_meta) || (s2_victimize && !s2_victim_dirty) From 656aa78f7df0d43f9fe650334823f6c844c1c00c Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Thu, 8 Sep 2016 21:27:28 -0700 Subject: [PATCH 2/3] Pipeline FMAs more deeply by default Rocket's QoR has improved enough that the FMAs are on the critical path. This change seems to keep the integer pipeline's logic paths balanced with the FPU. --- src/main/scala/rocket/fpu.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/scala/rocket/fpu.scala b/src/main/scala/rocket/fpu.scala index d36239b5..a75eac83 100644 --- a/src/main/scala/rocket/fpu.scala +++ b/src/main/scala/rocket/fpu.scala @@ -12,8 +12,8 @@ import cde.{Parameters, Field} case class FPUConfig( divSqrt: Boolean = true, - sfmaLatency: Int = 2, - dfmaLatency: Int = 3 + sfmaLatency: Int = 3, + dfmaLatency: Int = 4 ) object FPConstants From cf3c6fa277cccb5cd2464ff1928525747fb287fb Mon Sep 17 00:00:00 2001 From: Colin Schmidt Date: Fri, 9 Sep 2016 10:57:10 -0700 Subject: [PATCH 3/3] add STOP_COND to emulator & match vsim PRINTF_COND --- csrc/emulator.cc | 2 ++ csrc/verilator.h | 1 + emulator/Makefrag-verilator | 4 +++- 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/csrc/emulator.cc b/csrc/emulator.cc index 264e8d48..a6b04de1 100644 --- a/csrc/emulator.cc +++ b/csrc/emulator.cc @@ -15,6 +15,7 @@ extern dtm_t* dtm; static uint64_t trace_count = 0; bool verbose; +bool done_reset; void handle_sigterm(int sig) { @@ -89,6 +90,7 @@ int main(int argc, char** argv) tile->eval(); tile->reset = 0; } + done_reset = true; while (!dtm->done() && !tile->io_success && trace_count < max_cycles) { tile->clk = 0; diff --git a/csrc/verilator.h b/csrc/verilator.h index 3dfc0672..b4cd0659 100644 --- a/csrc/verilator.h +++ b/csrc/verilator.h @@ -6,6 +6,7 @@ #include extern bool verbose; +extern bool done_reset; class VerilatedVcdFILE : public VerilatedVcdFile { public: diff --git a/emulator/Makefrag-verilator b/emulator/Makefrag-verilator index a31d2819..2cf13f33 100644 --- a/emulator/Makefrag-verilator +++ b/emulator/Makefrag-verilator @@ -49,7 +49,9 @@ verilator/verilator-$(VERILATOR_VERSION).tar.gz: # Run Verilator to produce a fast binary to emulate this circuit. VERILATOR := $(INSTALLED_VERILATOR) --cc --exe -VERILATOR_FLAGS := --top-module $(MODEL) +define+PRINTF_COND=\$$c\(\"verbose\"\) --assert \ +VERILATOR_FLAGS := --top-module $(MODEL) \ + +define+PRINTF_COND=\$$c\(\"verbose\",\"\&\&\"\,\"done_reset\"\) \ + +define+STOP_COND=\$$c\(\"done_reset\"\) --assert \ -Wno-STMTDLY --x-assign unique \ -I$(base_dir)/vsrc \ -O3 -CFLAGS "$(CXXFLAGS) -DVERILATOR -include $(base_dir)/csrc/verilator.h"