Merge pull request #269 from ucb-bar/tweaks
Two tweaks to Rocket perf & QoR
This commit is contained in:
commit
e6889ea711
@ -15,6 +15,7 @@
|
|||||||
extern dtm_t* dtm;
|
extern dtm_t* dtm;
|
||||||
static uint64_t trace_count = 0;
|
static uint64_t trace_count = 0;
|
||||||
bool verbose;
|
bool verbose;
|
||||||
|
bool done_reset;
|
||||||
|
|
||||||
void handle_sigterm(int sig)
|
void handle_sigterm(int sig)
|
||||||
{
|
{
|
||||||
@ -89,6 +90,7 @@ int main(int argc, char** argv)
|
|||||||
tile->eval();
|
tile->eval();
|
||||||
tile->reset = 0;
|
tile->reset = 0;
|
||||||
}
|
}
|
||||||
|
done_reset = true;
|
||||||
|
|
||||||
while (!dtm->done() && !tile->io_success && trace_count < max_cycles) {
|
while (!dtm->done() && !tile->io_success && trace_count < max_cycles) {
|
||||||
tile->clk = 0;
|
tile->clk = 0;
|
||||||
|
@ -6,6 +6,7 @@
|
|||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
|
|
||||||
extern bool verbose;
|
extern bool verbose;
|
||||||
|
extern bool done_reset;
|
||||||
|
|
||||||
class VerilatedVcdFILE : public VerilatedVcdFile {
|
class VerilatedVcdFILE : public VerilatedVcdFile {
|
||||||
public:
|
public:
|
||||||
|
@ -49,7 +49,9 @@ verilator/verilator-$(VERILATOR_VERSION).tar.gz:
|
|||||||
|
|
||||||
# Run Verilator to produce a fast binary to emulate this circuit.
|
# Run Verilator to produce a fast binary to emulate this circuit.
|
||||||
VERILATOR := $(INSTALLED_VERILATOR) --cc --exe
|
VERILATOR := $(INSTALLED_VERILATOR) --cc --exe
|
||||||
VERILATOR_FLAGS := --top-module $(MODEL) +define+PRINTF_COND=\$$c\(\"verbose\"\) --assert \
|
VERILATOR_FLAGS := --top-module $(MODEL) \
|
||||||
|
+define+PRINTF_COND=\$$c\(\"verbose\",\"\&\&\"\,\"done_reset\"\) \
|
||||||
|
+define+STOP_COND=\$$c\(\"done_reset\"\) --assert \
|
||||||
-Wno-STMTDLY --x-assign unique \
|
-Wno-STMTDLY --x-assign unique \
|
||||||
-I$(base_dir)/vsrc \
|
-I$(base_dir)/vsrc \
|
||||||
-O3 -CFLAGS "$(CXXFLAGS) -DVERILATOR -include $(base_dir)/csrc/verilator.h"
|
-O3 -CFLAGS "$(CXXFLAGS) -DVERILATOR -include $(base_dir)/csrc/verilator.h"
|
||||||
|
@ -173,9 +173,9 @@ class DCache(implicit p: Parameters) extends L1HellaCacheModule()(p) {
|
|||||||
when (s2_valid && (!s2_valid_hit || s2_update_meta)) { s1_nack := true }
|
when (s2_valid && (!s2_valid_hit || s2_update_meta)) { s1_nack := true }
|
||||||
|
|
||||||
// exceptions
|
// exceptions
|
||||||
val misaligned = new StoreGen(s1_req.typ, s1_req.addr, UInt(0), wordBytes).misaligned
|
val s1_storegen = new StoreGen(s1_req.typ, s1_req.addr, UInt(0), wordBytes)
|
||||||
io.cpu.xcpt.ma.ld := s1_read && misaligned
|
io.cpu.xcpt.ma.ld := s1_read && s1_storegen.misaligned
|
||||||
io.cpu.xcpt.ma.st := s1_write && misaligned
|
io.cpu.xcpt.ma.st := s1_write && s1_storegen.misaligned
|
||||||
io.cpu.xcpt.pf.ld := s1_read && tlb.io.resp.xcpt_ld
|
io.cpu.xcpt.pf.ld := s1_read && tlb.io.resp.xcpt_ld
|
||||||
io.cpu.xcpt.pf.st := s1_write && tlb.io.resp.xcpt_st
|
io.cpu.xcpt.pf.st := s1_write && tlb.io.resp.xcpt_st
|
||||||
|
|
||||||
@ -232,8 +232,8 @@ class DCache(implicit p: Parameters) extends L1HellaCacheModule()(p) {
|
|||||||
// store->load RAW hazard detection
|
// store->load RAW hazard detection
|
||||||
val s1_idx = s1_req.addr(idxMSB, wordOffBits)
|
val s1_idx = s1_req.addr(idxMSB, wordOffBits)
|
||||||
val s1_raw_hazard = s1_read &&
|
val s1_raw_hazard = s1_read &&
|
||||||
((pstore1_valid && pstore1_addr(idxMSB, wordOffBits) === s1_idx) ||
|
((pstore1_valid && pstore1_addr(idxMSB, wordOffBits) === s1_idx && (pstore1_storegen.mask & s1_storegen.mask).orR) ||
|
||||||
(pstore2_valid && pstore2_addr(idxMSB, wordOffBits) === s1_idx))
|
(pstore2_valid && pstore2_addr(idxMSB, wordOffBits) === s1_idx && (pstore2_storegen_mask & s1_storegen.mask).orR))
|
||||||
when (s1_valid && s1_raw_hazard) { s1_nack := true }
|
when (s1_valid && s1_raw_hazard) { s1_nack := true }
|
||||||
|
|
||||||
metaWriteArb.io.in(0).valid := (s2_valid_hit && s2_update_meta) || (s2_victimize && !s2_victim_dirty)
|
metaWriteArb.io.in(0).valid := (s2_valid_hit && s2_update_meta) || (s2_victimize && !s2_victim_dirty)
|
||||||
|
@ -12,8 +12,8 @@ import cde.{Parameters, Field}
|
|||||||
|
|
||||||
case class FPUConfig(
|
case class FPUConfig(
|
||||||
divSqrt: Boolean = true,
|
divSqrt: Boolean = true,
|
||||||
sfmaLatency: Int = 2,
|
sfmaLatency: Int = 3,
|
||||||
dfmaLatency: Int = 3
|
dfmaLatency: Int = 4
|
||||||
)
|
)
|
||||||
|
|
||||||
object FPConstants
|
object FPConstants
|
||||||
|
Loading…
Reference in New Issue
Block a user