Merge pull request #269 from ucb-bar/tweaks
Two tweaks to Rocket perf & QoR
This commit is contained in:
commit
e6889ea711
@ -15,6 +15,7 @@
|
||||
extern dtm_t* dtm;
|
||||
static uint64_t trace_count = 0;
|
||||
bool verbose;
|
||||
bool done_reset;
|
||||
|
||||
void handle_sigterm(int sig)
|
||||
{
|
||||
@ -89,6 +90,7 @@ int main(int argc, char** argv)
|
||||
tile->eval();
|
||||
tile->reset = 0;
|
||||
}
|
||||
done_reset = true;
|
||||
|
||||
while (!dtm->done() && !tile->io_success && trace_count < max_cycles) {
|
||||
tile->clk = 0;
|
||||
|
@ -6,6 +6,7 @@
|
||||
#include <stdio.h>
|
||||
|
||||
extern bool verbose;
|
||||
extern bool done_reset;
|
||||
|
||||
class VerilatedVcdFILE : public VerilatedVcdFile {
|
||||
public:
|
||||
|
@ -49,7 +49,9 @@ verilator/verilator-$(VERILATOR_VERSION).tar.gz:
|
||||
|
||||
# Run Verilator to produce a fast binary to emulate this circuit.
|
||||
VERILATOR := $(INSTALLED_VERILATOR) --cc --exe
|
||||
VERILATOR_FLAGS := --top-module $(MODEL) +define+PRINTF_COND=\$$c\(\"verbose\"\) --assert \
|
||||
VERILATOR_FLAGS := --top-module $(MODEL) \
|
||||
+define+PRINTF_COND=\$$c\(\"verbose\",\"\&\&\"\,\"done_reset\"\) \
|
||||
+define+STOP_COND=\$$c\(\"done_reset\"\) --assert \
|
||||
-Wno-STMTDLY --x-assign unique \
|
||||
-I$(base_dir)/vsrc \
|
||||
-O3 -CFLAGS "$(CXXFLAGS) -DVERILATOR -include $(base_dir)/csrc/verilator.h"
|
||||
|
@ -173,9 +173,9 @@ class DCache(implicit p: Parameters) extends L1HellaCacheModule()(p) {
|
||||
when (s2_valid && (!s2_valid_hit || s2_update_meta)) { s1_nack := true }
|
||||
|
||||
// exceptions
|
||||
val misaligned = new StoreGen(s1_req.typ, s1_req.addr, UInt(0), wordBytes).misaligned
|
||||
io.cpu.xcpt.ma.ld := s1_read && misaligned
|
||||
io.cpu.xcpt.ma.st := s1_write && misaligned
|
||||
val s1_storegen = new StoreGen(s1_req.typ, s1_req.addr, UInt(0), wordBytes)
|
||||
io.cpu.xcpt.ma.ld := s1_read && s1_storegen.misaligned
|
||||
io.cpu.xcpt.ma.st := s1_write && s1_storegen.misaligned
|
||||
io.cpu.xcpt.pf.ld := s1_read && tlb.io.resp.xcpt_ld
|
||||
io.cpu.xcpt.pf.st := s1_write && tlb.io.resp.xcpt_st
|
||||
|
||||
@ -232,8 +232,8 @@ class DCache(implicit p: Parameters) extends L1HellaCacheModule()(p) {
|
||||
// store->load RAW hazard detection
|
||||
val s1_idx = s1_req.addr(idxMSB, wordOffBits)
|
||||
val s1_raw_hazard = s1_read &&
|
||||
((pstore1_valid && pstore1_addr(idxMSB, wordOffBits) === s1_idx) ||
|
||||
(pstore2_valid && pstore2_addr(idxMSB, wordOffBits) === s1_idx))
|
||||
((pstore1_valid && pstore1_addr(idxMSB, wordOffBits) === s1_idx && (pstore1_storegen.mask & s1_storegen.mask).orR) ||
|
||||
(pstore2_valid && pstore2_addr(idxMSB, wordOffBits) === s1_idx && (pstore2_storegen_mask & s1_storegen.mask).orR))
|
||||
when (s1_valid && s1_raw_hazard) { s1_nack := true }
|
||||
|
||||
metaWriteArb.io.in(0).valid := (s2_valid_hit && s2_update_meta) || (s2_victimize && !s2_victim_dirty)
|
||||
|
@ -12,8 +12,8 @@ import cde.{Parameters, Field}
|
||||
|
||||
case class FPUConfig(
|
||||
divSqrt: Boolean = true,
|
||||
sfmaLatency: Int = 2,
|
||||
dfmaLatency: Int = 3
|
||||
sfmaLatency: Int = 3,
|
||||
dfmaLatency: Int = 4
|
||||
)
|
||||
|
||||
object FPConstants
|
||||
|
Loading…
x
Reference in New Issue
Block a user