Merge pull request #269 from ucb-bar/tweaks
Two tweaks to Rocket perf & QoR
This commit is contained in:
		| @@ -15,6 +15,7 @@ | ||||
| extern dtm_t* dtm; | ||||
| static uint64_t trace_count = 0; | ||||
| bool verbose; | ||||
| bool done_reset; | ||||
|  | ||||
| void handle_sigterm(int sig) | ||||
| { | ||||
| @@ -89,6 +90,7 @@ int main(int argc, char** argv) | ||||
|     tile->eval(); | ||||
|     tile->reset = 0; | ||||
|   } | ||||
|   done_reset = true; | ||||
|  | ||||
|   while (!dtm->done() && !tile->io_success && trace_count < max_cycles) { | ||||
|     tile->clk = 0; | ||||
|   | ||||
| @@ -6,6 +6,7 @@ | ||||
| #include <stdio.h> | ||||
|  | ||||
| extern bool verbose; | ||||
| extern bool done_reset; | ||||
|  | ||||
| class VerilatedVcdFILE : public VerilatedVcdFile { | ||||
|  public: | ||||
|   | ||||
| @@ -49,7 +49,9 @@ verilator/verilator-$(VERILATOR_VERSION).tar.gz: | ||||
|  | ||||
| # Run Verilator to produce a fast binary to emulate this circuit. | ||||
| VERILATOR := $(INSTALLED_VERILATOR) --cc --exe | ||||
| VERILATOR_FLAGS := --top-module $(MODEL) +define+PRINTF_COND=\$$c\(\"verbose\"\) --assert \ | ||||
| VERILATOR_FLAGS := --top-module $(MODEL) \ | ||||
|   +define+PRINTF_COND=\$$c\(\"verbose\",\"\&\&\"\,\"done_reset\"\) \ | ||||
|   +define+STOP_COND=\$$c\(\"done_reset\"\) --assert \ | ||||
| 	-Wno-STMTDLY --x-assign unique \ | ||||
|   -I$(base_dir)/vsrc \ | ||||
|   -O3 -CFLAGS "$(CXXFLAGS) -DVERILATOR -include $(base_dir)/csrc/verilator.h" | ||||
|   | ||||
| @@ -173,9 +173,9 @@ class DCache(implicit p: Parameters) extends L1HellaCacheModule()(p) { | ||||
|   when (s2_valid && (!s2_valid_hit || s2_update_meta)) { s1_nack := true } | ||||
|  | ||||
|   // exceptions | ||||
|   val misaligned = new StoreGen(s1_req.typ, s1_req.addr, UInt(0), wordBytes).misaligned | ||||
|   io.cpu.xcpt.ma.ld := s1_read && misaligned | ||||
|   io.cpu.xcpt.ma.st := s1_write && misaligned | ||||
|   val s1_storegen = new StoreGen(s1_req.typ, s1_req.addr, UInt(0), wordBytes) | ||||
|   io.cpu.xcpt.ma.ld := s1_read && s1_storegen.misaligned | ||||
|   io.cpu.xcpt.ma.st := s1_write && s1_storegen.misaligned | ||||
|   io.cpu.xcpt.pf.ld := s1_read && tlb.io.resp.xcpt_ld | ||||
|   io.cpu.xcpt.pf.st := s1_write && tlb.io.resp.xcpt_st | ||||
|  | ||||
| @@ -232,8 +232,8 @@ class DCache(implicit p: Parameters) extends L1HellaCacheModule()(p) { | ||||
|   // store->load RAW hazard detection | ||||
|   val s1_idx = s1_req.addr(idxMSB, wordOffBits) | ||||
|   val s1_raw_hazard = s1_read && | ||||
|     ((pstore1_valid && pstore1_addr(idxMSB, wordOffBits) === s1_idx) || | ||||
|      (pstore2_valid && pstore2_addr(idxMSB, wordOffBits) === s1_idx)) | ||||
|     ((pstore1_valid && pstore1_addr(idxMSB, wordOffBits) === s1_idx && (pstore1_storegen.mask & s1_storegen.mask).orR) || | ||||
|      (pstore2_valid && pstore2_addr(idxMSB, wordOffBits) === s1_idx && (pstore2_storegen_mask & s1_storegen.mask).orR)) | ||||
|   when (s1_valid && s1_raw_hazard) { s1_nack := true } | ||||
|  | ||||
|   metaWriteArb.io.in(0).valid := (s2_valid_hit && s2_update_meta) || (s2_victimize && !s2_victim_dirty) | ||||
|   | ||||
| @@ -12,8 +12,8 @@ import cde.{Parameters, Field} | ||||
|  | ||||
| case class FPUConfig( | ||||
|   divSqrt: Boolean = true, | ||||
|   sfmaLatency: Int = 2, | ||||
|   dfmaLatency: Int = 3 | ||||
|   sfmaLatency: Int = 3, | ||||
|   dfmaLatency: Int = 4 | ||||
| ) | ||||
|  | ||||
| object FPConstants | ||||
|   | ||||
		Reference in New Issue
	
	Block a user