1
0

Support cache->cpu nacks one cycle after request

This commit is contained in:
Andrew Waterman 2011-12-10 00:42:09 -08:00
parent c01e1f1cef
commit ce201559f3
7 changed files with 60 additions and 45 deletions

View File

@ -186,7 +186,8 @@ object Constants
// rocketNBDCacheDM parameters
val CPU_DATA_BITS = 64;
val CPU_TAG_BITS = 5;
val CPU_TAG_BITS = 11;
val DCACHE_TAG_BITS = 1 + CPU_TAG_BITS;
val OFFSET_BITS = 6; // log2(cache line size in bytes)
val NMSHR = 2; // number of primary misses
val NRPQ = 16; // number of secondary misses

View File

@ -95,13 +95,14 @@ class rocketProc extends Component
arb.io.cpu.req_val := ctrl.io.dmem.req_val;
arb.io.cpu.req_cmd := ctrl.io.dmem.req_cmd;
arb.io.cpu.req_type := ctrl.io.dmem.req_type;
arb.io.cpu.dtlb_miss := ctrl.io.dpath.killm;
arb.io.cpu.req_nack := ctrl.io.dpath.killm;
arb.io.cpu.req_idx := dpath.io.dmem.req_addr(PGIDX_BITS-1,0);
arb.io.cpu.req_ppn := dtlb.io.cpu.resp_ppn;
arb.io.cpu.req_data := dpath.io.dmem.req_data;
arb.io.cpu.req_tag := dpath.io.dmem.req_tag;
ctrl.io.dmem.req_rdy := dtlb.io.cpu.req_rdy && arb.io.cpu.req_rdy;
ctrl.io.dmem.resp_miss := arb.io.cpu.resp_miss;
ctrl.io.dmem.resp_nack := arb.io.cpu.resp_nack;
dpath.io.dmem.resp_val := arb.io.cpu.resp_val;
dpath.io.dmem.resp_tag := arb.io.cpu.resp_tag;
dpath.io.dmem.resp_data := arb.io.cpu.resp_data;

View File

@ -37,6 +37,7 @@ class ioCtrlDpath extends Bundle()
val mem_eret = Bool('output);
val mem_load = Bool('output);
val wen = Bool('output);
val ex_mem_type = UFix(3, 'output)
// instruction in execute is an unconditional jump
val ex_jmp = Bool('output);
// enable/disable interrupts
@ -74,7 +75,7 @@ class ioCtrlAll extends Bundle()
val dpath = new ioCtrlDpath();
val console = new ioConsole(List("rdy"));
val imem = new ioImem(List("req_val", "req_rdy", "resp_val")).flip();
val dmem = new ioDmem(List("req_val", "req_rdy", "req_cmd", "req_type", "resp_miss")).flip();
val dmem = new ioDmem(List("req_val", "req_rdy", "req_cmd", "req_type", "resp_miss", "resp_nack")).flip();
val host = new ioHost(List("start"));
val dtlb_val = Bool('output)
val dtlb_rdy = Bool('input);
@ -303,8 +304,8 @@ class rocketCtrl extends Component
val id_console_out_val = id_wen_pcr.toBool && (id_raddr2 === PCR_CONSOLE);
val wb_reg_div_mul_val = Reg(){Bool()};
val dcache_miss = Reg(io.dmem.resp_miss);
val wb_reg_div_mul_val = Reg(resetVal = Bool(false))
val dcache_miss = Reg(io.dmem.resp_miss, resetVal = Bool(false));
val sboard = new rocketCtrlSboard();
sboard.io.raddra := id_raddr2.toUFix;
@ -526,7 +527,7 @@ class rocketCtrl extends Component
io.dpath.badvaddr_wen := io.xcpt_dtlb_ld || io.xcpt_dtlb_st;
// replay mem stage PC on a DTLB miss
val mem_hazard = io.dtlb_miss
val mem_hazard = io.dtlb_miss || io.dmem.resp_nack
val replay_mem = mem_hazard || mem_reg_replay;
val kill_mem = mem_hazard || mem_exception;
@ -667,6 +668,7 @@ class rocketCtrl extends Component
io.dmem.req_val := ex_reg_mem_val && !kill_dmem;
io.dmem.req_cmd := ex_reg_mem_cmd;
io.dmem.req_type := ex_reg_mem_type;
io.dpath.ex_mem_type:= ex_reg_mem_type
}
}

View File

@ -7,7 +7,7 @@ import scala.math._;
// interface between D$ and processor/DTLB
class ioDmem(view: List[String] = null) extends Bundle(view) {
val dtlb_miss = Bool('input);
val req_nack = Bool('input);
val req_val = Bool('input);
val req_rdy = Bool('output);
val req_cmd = Bits(4, 'input);
@ -15,13 +15,14 @@ class ioDmem(view: List[String] = null) extends Bundle(view) {
val req_idx = Bits(PGIDX_BITS, 'input);
val req_ppn = Bits(PPN_BITS, 'input);
val req_data = Bits(64, 'input);
val req_tag = Bits(5, 'input);
val req_tag = Bits(DCACHE_TAG_BITS, 'input);
val xcpt_ma_ld = Bool('output); // misaligned load
val xcpt_ma_st = Bool('output); // misaligned store
val resp_miss = Bool('output);
val resp_nack = Bool('output);
val resp_val = Bool('output);
val resp_data = Bits(64, 'output);
val resp_tag = Bits(12, 'output);
val resp_tag = Bits(DCACHE_TAG_BITS, 'output);
}
// interface between D$ and next level in memory hierarchy
@ -125,7 +126,7 @@ class rocketDCacheDM_flush(lines: Int) extends Component {
val flush_resp_count = Reg(resetVal = UFix(0, indexbits));
val flushing = Reg(resetVal = Bool(false));
val flush_waiting = Reg(resetVal = Bool(false));
val r_cpu_req_tag = Reg(resetVal = Bits(0, 5));
val r_cpu_req_tag = Reg() { Bits() }
when (io.cpu.req_val && io.cpu.req_rdy && (io.cpu.req_cmd === M_FLA))
{
@ -144,7 +145,7 @@ class rocketDCacheDM_flush(lines: Int) extends Component {
when (flushing && dcache.io.cpu.req_rdy) {
flush_count <== flush_count + UFix(1,1);
}
when (flush_waiting && dcache.io.cpu.resp_val && (dcache.io.cpu.resp_tag(5,0) === r_cpu_req_tag)) {
when (flush_waiting && dcache.io.cpu.resp_val && (dcache.io.cpu.resp_tag === r_cpu_req_tag)) {
flush_resp_count <== flush_resp_count + UFix(1,1);
}
@ -155,13 +156,14 @@ class rocketDCacheDM_flush(lines: Int) extends Component {
dcache.io.cpu.req_tag := Mux(flushing, r_cpu_req_tag, io.cpu.req_tag);
dcache.io.cpu.req_type := io.cpu.req_type;
dcache.io.cpu.req_data ^^ io.cpu.req_data;
dcache.io.cpu.dtlb_miss := io.cpu.dtlb_miss && !flush_waiting;
dcache.io.cpu.req_nack := io.cpu.req_nack && !flush_waiting;
dcache.io.mem ^^ io.mem;
io.cpu.xcpt_ma_ld := dcache.io.cpu.xcpt_ma_ld;
io.cpu.xcpt_ma_st := dcache.io.cpu.xcpt_ma_st;
io.cpu.req_rdy := dcache.io.cpu.req_rdy && !flush_waiting;
io.cpu.resp_miss := dcache.io.cpu.resp_miss;
io.cpu.resp_nack := dcache.io.cpu.resp_nack;
io.cpu.resp_data := dcache.io.cpu.resp_data;
io.cpu.resp_tag := dcache.io.cpu.resp_tag;
io.cpu.resp_val := dcache.io.cpu.resp_val &
@ -192,7 +194,7 @@ class rocketDCacheDM(lines: Int) extends Component {
val r_cpu_req_val = Reg(resetVal = Bool(false));
val r_cpu_req_cmd = Reg(resetVal = Bits(0,4));
val r_cpu_req_type = Reg(resetVal = Bits(0,3));
val r_cpu_req_tag = Reg(resetVal = Bits(0,5));
val r_cpu_req_tag = Reg() { Bits() }
val r_cpu_resp_val = Reg(resetVal = Bool(false));
val r_amo_data = Reg(resetVal = Bits(0,64));
@ -218,7 +220,7 @@ class rocketDCacheDM(lines: Int) extends Component {
r_cpu_req_tag <== io.cpu.req_tag;
}
when ((state === s_ready) && r_cpu_req_val && !io.cpu.dtlb_miss) {
when ((state === s_ready) && r_cpu_req_val && !io.cpu.req_nack) {
r_cpu_req_ppn <== io.cpu.req_ppn;
}
when (io.cpu.req_rdy) {
@ -271,7 +273,7 @@ class rocketDCacheDM(lines: Int) extends Component {
// load/store addresses conflict if they are to any part of the same 64 bit word
val addr_match = (r_cpu_req_idx(PGIDX_BITS-1,offsetlsb) === p_store_idx(PGIDX_BITS-1,offsetlsb));
val ldst_conflict = tag_valid && tag_match && (r_req_load || r_req_amo) && p_store_valid && addr_match;
val store_hit = r_cpu_req_val && !io.cpu.dtlb_miss && tag_hit && r_req_store ;
val store_hit = r_cpu_req_val && !io.cpu.req_nack && tag_hit && r_req_store ;
// write the pending store data when the cache is idle, when the next command isn't a load
// or when there's a load to the same address (in which case there's a 2 cycle delay:
@ -305,7 +307,7 @@ class rocketDCacheDM(lines: Int) extends Component {
// dirty bit array
val db_array = Reg(resetVal = Bits(0, lines));
val tag_dirty = db_array(r_cpu_req_idx(PGIDX_BITS-1,offsetbits).toUFix).toBool;
when ((r_cpu_req_val && !io.cpu.dtlb_miss && tag_hit && r_req_store) || resolve_store) {
when ((r_cpu_req_val && !io.cpu.req_nack && tag_hit && r_req_store) || resolve_store) {
db_array <== db_array.bitSet(p_store_idx(PGIDX_BITS-1,offsetbits).toUFix, UFix(1,1));
}
when (state === s_write_amo) {
@ -381,13 +383,13 @@ class rocketDCacheDM(lines: Int) extends Component {
// signal a load miss when the data isn't present in the cache and when it's in the pending store data register
// (causes the cache to block for 2 cycles and the load or amo instruction is replayed)
val load_miss =
!io.cpu.dtlb_miss &&
!io.cpu.req_nack &&
(state === s_ready) && r_cpu_req_val && (r_req_load || r_req_amo) && (!tag_hit || (p_store_valid && addr_match));
// output signals
// busy when there's a load to the same address as a pending store, or on a cache miss, or when executing a flush
io.cpu.req_rdy := (state === s_ready) && !io.cpu.dtlb_miss && !ldst_conflict && (!r_cpu_req_val || (tag_hit && !(r_req_flush || r_req_amo)));
io.cpu.resp_val := !io.cpu.dtlb_miss &&
io.cpu.req_rdy := (state === s_ready) && !io.cpu.req_nack && !ldst_conflict && (!r_cpu_req_val || (tag_hit && !(r_req_flush || r_req_amo)));
io.cpu.resp_val := !io.cpu.req_nack &&
((state === s_ready) && tag_hit && (r_req_load || r_req_amo) && !(p_store_valid && addr_match)) ||
((state === s_resolve_miss) && r_req_flush) ||
r_cpu_resp_val;
@ -401,8 +403,8 @@ class rocketDCacheDM(lines: Int) extends Component {
io.cpu.xcpt_ma_st := r_cpu_req_val && (r_req_store || r_req_amo) && misaligned;
io.cpu.resp_miss := load_miss;
// tag MSB distinguishes between loads destined for the PTW and CPU
io.cpu.resp_tag := Cat(r_req_ptw_load, r_cpu_req_type, r_cpu_req_idx(2,0), r_cpu_req_tag);
io.cpu.resp_nack := Bool(false)
io.cpu.resp_tag := r_cpu_req_tag
io.cpu.resp_data := resp_data;
io.mem.req_val := (state === s_req_refill) || (state === s_writeback);
@ -419,7 +421,7 @@ class rocketDCacheDM(lines: Int) extends Component {
state <== s_ready;
}
is (s_ready) {
when (io.cpu.dtlb_miss) {
when (io.cpu.req_nack) {
state <== s_ready;
}
when (ldst_conflict) {

View File

@ -8,10 +8,10 @@ import Instructions._
class ioDpathDmem extends Bundle()
{
val req_addr = UFix(VADDR_BITS, 'output);
val req_tag = UFix(5, 'output);
val req_tag = UFix(CPU_TAG_BITS, 'output);
val req_data = Bits(64, 'output);
val resp_val = Bool('input);
val resp_tag = Bits(12, 'input); // FIXME: MSB is ignored
val resp_tag = Bits(CPU_TAG_BITS, 'input);
val resp_data = Bits(64, 'input);
}
@ -328,7 +328,7 @@ class rocketDpath extends Component
// other signals (req_val, req_rdy) connect to control module
io.dmem.req_addr := ex_alu_out(VADDR_BITS-1,0);
io.dmem.req_data := ex_reg_rs2;
io.dmem.req_tag := ex_reg_waddr;
io.dmem.req_tag := Cat(io.ctrl.ex_mem_type, io.dmem.req_addr(2,0), ex_reg_waddr).toUFix;
// processor control regfile read
pcr.io.r.en := ex_reg_ctrl_ren_pcr | ex_reg_ctrl_eret;

View File

@ -440,7 +440,7 @@ class rocketNBDCacheDM_flush(lines: Int) extends Component {
dcache.io.cpu.req_tag := Mux(flushing, r_cpu_req_tag, io.cpu.req_tag);
dcache.io.cpu.req_type := io.cpu.req_type;
dcache.io.cpu.req_data ^^ io.cpu.req_data;
dcache.io.cpu.dtlb_miss := io.cpu.dtlb_miss;
dcache.io.cpu.req_nack := io.cpu.req_nack;
dcache.io.mem ^^ io.mem;
io.cpu.xcpt_ma_ld := dcache.io.cpu.xcpt_ma_ld;
@ -580,7 +580,7 @@ class rocketNBDCacheDM(lines: Int) extends Component {
r_cpu_req_tag <== io.cpu.req_tag;
}
when ((state === s_ready) && r_cpu_req_val && !io.cpu.dtlb_miss) {
when ((state === s_ready) && r_cpu_req_val && !io.cpu.req_nack) {
r_cpu_req_ppn <== io.cpu.req_ppn;
}
when (io.cpu.req_rdy) {
@ -633,7 +633,7 @@ class rocketNBDCacheDM(lines: Int) extends Component {
// load/store addresses conflict if they are to any part of the same 64 bit word
val addr_match = (r_cpu_req_idx(PGIDX_BITS-1,offsetlsb) === p_store_idx(PGIDX_BITS-1,offsetlsb));
val ldst_conflict = tag_valid && tag_match && (r_req_load || r_req_amo) && p_store_valid && addr_match;
val store_hit = r_cpu_req_val && !io.cpu.dtlb_miss && tag_hit && r_req_store ;
val store_hit = r_cpu_req_val && !io.cpu.req_nack && tag_hit && r_req_store ;
// write the pending store data when the cache is idle, when the next command isn't a load
// or when there's a load to the same address (in which case there's a 2 cycle delay:
@ -667,7 +667,7 @@ class rocketNBDCacheDM(lines: Int) extends Component {
// dirty bit array
val db_array = Reg(resetVal = Bits(0, lines));
val tag_dirty = db_array(r_cpu_req_idx(PGIDX_BITS-1,offsetbits).toUFix).toBool;
when ((r_cpu_req_val && !io.cpu.dtlb_miss && tag_hit && r_req_store) || resolve_store) {
when ((r_cpu_req_val && !io.cpu.req_nack && tag_hit && r_req_store) || resolve_store) {
db_array <== db_array.bitSet(p_store_idx(PGIDX_BITS-1,offsetbits).toUFix, UFix(1,1));
}
when (state === s_write_amo) {
@ -751,13 +751,13 @@ class rocketNBDCacheDM(lines: Int) extends Component {
// signal a load miss when the data isn't present in the cache and when it's in the pending store data register
// (causes the cache to block for 2 cycles and the load or amo instruction is replayed)
val load_miss =
!io.cpu.dtlb_miss &&
!io.cpu.req_nack &&
(state === s_ready) && r_cpu_req_val && (r_req_load || r_req_amo) && (!tag_hit || (p_store_valid && addr_match));
// output signals
// busy when there's a load to the same address as a pending store, or on a cache miss, or when executing a flush
io.cpu.req_rdy := mshr.io.req_rdy && (state === s_ready) && !io.cpu.dtlb_miss && !ldst_conflict && (!r_cpu_req_val || (tag_hit && !(r_req_flush || r_req_amo)));
io.cpu.resp_val := !io.cpu.dtlb_miss &&
io.cpu.req_rdy := mshr.io.req_rdy && (state === s_ready) && !io.cpu.req_nack && !ldst_conflict && (!r_cpu_req_val || (tag_hit && !(r_req_flush || r_req_amo)));
io.cpu.resp_val := !io.cpu.req_nack &&
((state === s_ready) && tag_hit && (r_req_load || r_req_amo) && !(p_store_valid && addr_match)) ||
((state === s_resolve_miss) && r_req_flush) ||
r_cpu_resp_val;
@ -789,7 +789,7 @@ class rocketNBDCacheDM(lines: Int) extends Component {
state <== s_ready;
}
is (s_ready) {
when (io.cpu.dtlb_miss) {
when (io.cpu.req_nack) {
state <== s_ready;
}
when (ldst_conflict) {

View File

@ -7,7 +7,7 @@ import scala.math._;
class ioDmemArbiter extends Bundle
{
val ptw = new ioDmem(List("req_val", "req_rdy", "req_cmd", "req_type", "req_idx", "req_ppn", "resp_data", "resp_val"));
val ptw = new ioDmem(List("req_val", "req_rdy", "req_cmd", "req_type", "req_idx", "req_ppn", "resp_data", "resp_val", "resp_nack"));
val cpu = new ioDmem();
val mem = new ioDmem().flip();
}
@ -24,31 +24,31 @@ class rocketDmemArbiter extends Component
io.mem.req_cmd := Mux(io.ptw.req_val, io.ptw.req_cmd, io.cpu.req_cmd);
io.mem.req_type := Mux(io.ptw.req_val, io.ptw.req_type, io.cpu.req_type);
io.mem.req_idx := Mux(io.ptw.req_val, io.ptw.req_idx, io.cpu.req_idx);
// io.mem.req_ppn := Mux(io.ptw.req_val, io.ptw.req_ppn, io.cpu.req_ppn);
io.mem.req_ppn := Mux(r_ptw_req_val, r_ptw_req_ppn, io.cpu.req_ppn);
io.mem.req_data := io.cpu.req_data;
io.mem.req_tag := Mux(io.ptw.req_val, Bits(0,5), io.cpu.req_tag);
// io.mem.dtlb_busy := io.cpu.dtlb_busy;
io.mem.dtlb_miss := io.cpu.dtlb_miss;
io.mem.req_tag := Cat(io.cpu.req_tag, io.ptw.req_val);
io.mem.req_nack := io.cpu.req_nack;
io.ptw.req_rdy := io.mem.req_rdy;
io.cpu.req_rdy := io.mem.req_rdy && !io.ptw.req_val;
io.cpu.resp_miss := io.mem.resp_miss && !io.mem.resp_tag(11).toBool;
io.cpu.resp_miss := io.mem.resp_miss && !io.mem.resp_tag(0).toBool;
io.cpu.resp_val := io.mem.resp_val && !io.mem.resp_tag(11).toBool;
io.ptw.resp_val := io.mem.resp_val && io.mem.resp_tag(11).toBool;
io.cpu.resp_nack := io.mem.resp_nack && !r_ptw_req_val
io.ptw.resp_nack := io.mem.resp_nack && r_ptw_req_val
io.cpu.resp_val := io.mem.resp_val && !io.mem.resp_tag(0).toBool;
io.ptw.resp_val := io.mem.resp_val && io.mem.resp_tag(0).toBool;
io.ptw.resp_data := io.mem.resp_data;
io.cpu.resp_data := io.mem.resp_data;
// io.cpu.resp_tag := io.mem.resp_tag(10,0);
io.cpu.resp_tag := io.mem.resp_tag;
io.cpu.resp_tag := io.mem.resp_tag >> UFix(1);
}
class ioPTW extends Bundle
{
val itlb = new ioTLB_PTW().flip();
val dtlb = new ioTLB_PTW().flip();
val dmem = new ioDmem(List("req_val", "req_rdy", "req_cmd", "req_type", "req_ppn", "req_idx", "resp_data", "resp_val")).flip();
val dmem = new ioDmem(List("req_val", "req_rdy", "req_cmd", "req_type", "req_ppn", "req_idx", "resp_data", "resp_val", "resp_nack")).flip();
val ptbr = UFix(PADDR_BITS, 'input);
}
@ -139,6 +139,9 @@ class rocketPTW extends Component
}
}
is (s_l1_wait) {
when (io.dmem.resp_nack) {
state <== s_l1_req
}
when (io.dmem.resp_val) {
when (resp_ptd) { // page table descriptor
state <== s_l2_req;
@ -161,6 +164,9 @@ class rocketPTW extends Component
}
}
is (s_l2_wait) {
when (io.dmem.resp_nack) {
state <== s_l2_req
}
when (io.dmem.resp_val) {
when (resp_ptd) { // page table descriptor
state <== s_l3_req;
@ -183,6 +189,9 @@ class rocketPTW extends Component
}
}
is (s_l3_wait) {
when (io.dmem.resp_nack) {
state <== s_l3_req
}
when (io.dmem.resp_val) {
when (resp_pte) { // page table entry
state <== s_done;