1
0

Merge pull request #269 from ucb-bar/tweaks

Two tweaks to Rocket perf & QoR
This commit is contained in:
Andrew Waterman 2016-09-09 15:25:15 -07:00 committed by GitHub
commit e6889ea711
5 changed files with 13 additions and 8 deletions

View File

@ -15,6 +15,7 @@
extern dtm_t* dtm; extern dtm_t* dtm;
static uint64_t trace_count = 0; static uint64_t trace_count = 0;
bool verbose; bool verbose;
bool done_reset;
void handle_sigterm(int sig) void handle_sigterm(int sig)
{ {
@ -89,6 +90,7 @@ int main(int argc, char** argv)
tile->eval(); tile->eval();
tile->reset = 0; tile->reset = 0;
} }
done_reset = true;
while (!dtm->done() && !tile->io_success && trace_count < max_cycles) { while (!dtm->done() && !tile->io_success && trace_count < max_cycles) {
tile->clk = 0; tile->clk = 0;

View File

@ -6,6 +6,7 @@
#include <stdio.h> #include <stdio.h>
extern bool verbose; extern bool verbose;
extern bool done_reset;
class VerilatedVcdFILE : public VerilatedVcdFile { class VerilatedVcdFILE : public VerilatedVcdFile {
public: public:

View File

@ -49,7 +49,9 @@ verilator/verilator-$(VERILATOR_VERSION).tar.gz:
# Run Verilator to produce a fast binary to emulate this circuit. # Run Verilator to produce a fast binary to emulate this circuit.
VERILATOR := $(INSTALLED_VERILATOR) --cc --exe VERILATOR := $(INSTALLED_VERILATOR) --cc --exe
VERILATOR_FLAGS := --top-module $(MODEL) +define+PRINTF_COND=\$$c\(\"verbose\"\) --assert \ VERILATOR_FLAGS := --top-module $(MODEL) \
+define+PRINTF_COND=\$$c\(\"verbose\",\"\&\&\"\,\"done_reset\"\) \
+define+STOP_COND=\$$c\(\"done_reset\"\) --assert \
-Wno-STMTDLY --x-assign unique \ -Wno-STMTDLY --x-assign unique \
-I$(base_dir)/vsrc \ -I$(base_dir)/vsrc \
-O3 -CFLAGS "$(CXXFLAGS) -DVERILATOR -include $(base_dir)/csrc/verilator.h" -O3 -CFLAGS "$(CXXFLAGS) -DVERILATOR -include $(base_dir)/csrc/verilator.h"

View File

@ -173,9 +173,9 @@ class DCache(implicit p: Parameters) extends L1HellaCacheModule()(p) {
when (s2_valid && (!s2_valid_hit || s2_update_meta)) { s1_nack := true } when (s2_valid && (!s2_valid_hit || s2_update_meta)) { s1_nack := true }
// exceptions // exceptions
val misaligned = new StoreGen(s1_req.typ, s1_req.addr, UInt(0), wordBytes).misaligned val s1_storegen = new StoreGen(s1_req.typ, s1_req.addr, UInt(0), wordBytes)
io.cpu.xcpt.ma.ld := s1_read && misaligned io.cpu.xcpt.ma.ld := s1_read && s1_storegen.misaligned
io.cpu.xcpt.ma.st := s1_write && misaligned io.cpu.xcpt.ma.st := s1_write && s1_storegen.misaligned
io.cpu.xcpt.pf.ld := s1_read && tlb.io.resp.xcpt_ld io.cpu.xcpt.pf.ld := s1_read && tlb.io.resp.xcpt_ld
io.cpu.xcpt.pf.st := s1_write && tlb.io.resp.xcpt_st io.cpu.xcpt.pf.st := s1_write && tlb.io.resp.xcpt_st
@ -232,8 +232,8 @@ class DCache(implicit p: Parameters) extends L1HellaCacheModule()(p) {
// store->load RAW hazard detection // store->load RAW hazard detection
val s1_idx = s1_req.addr(idxMSB, wordOffBits) val s1_idx = s1_req.addr(idxMSB, wordOffBits)
val s1_raw_hazard = s1_read && val s1_raw_hazard = s1_read &&
((pstore1_valid && pstore1_addr(idxMSB, wordOffBits) === s1_idx) || ((pstore1_valid && pstore1_addr(idxMSB, wordOffBits) === s1_idx && (pstore1_storegen.mask & s1_storegen.mask).orR) ||
(pstore2_valid && pstore2_addr(idxMSB, wordOffBits) === s1_idx)) (pstore2_valid && pstore2_addr(idxMSB, wordOffBits) === s1_idx && (pstore2_storegen_mask & s1_storegen.mask).orR))
when (s1_valid && s1_raw_hazard) { s1_nack := true } when (s1_valid && s1_raw_hazard) { s1_nack := true }
metaWriteArb.io.in(0).valid := (s2_valid_hit && s2_update_meta) || (s2_victimize && !s2_victim_dirty) metaWriteArb.io.in(0).valid := (s2_valid_hit && s2_update_meta) || (s2_victimize && !s2_victim_dirty)

View File

@ -12,8 +12,8 @@ import cde.{Parameters, Field}
case class FPUConfig( case class FPUConfig(
divSqrt: Boolean = true, divSqrt: Boolean = true,
sfmaLatency: Int = 2, sfmaLatency: Int = 3,
dfmaLatency: Int = 3 dfmaLatency: Int = 4
) )
object FPConstants object FPConstants