caches now use Mem4() memories for tag+data arrays
This commit is contained in:
parent
c580180b66
commit
e894b79870
@ -249,20 +249,11 @@ class rocketDCacheDM(lines: Int) extends Component {
|
|||||||
((state === s_refill) && io.mem.resp_val && (rr_count === UFix(3,2))) ||
|
((state === s_refill) && io.mem.resp_val && (rr_count === UFix(3,2))) ||
|
||||||
((state === s_resolve_miss) && r_req_flush);
|
((state === s_resolve_miss) && r_req_flush);
|
||||||
|
|
||||||
// val tag_array = new rocketSRAMsp(lines, tagbits);
|
val tag_array = Mem4(lines, r_cpu_req_ppn);
|
||||||
val tag_array = new TS1N65LPA128X27M4;
|
tag_array.setReadLatency(0);
|
||||||
val tag_array_ceb = Mux(reset, Bool(true), !(
|
val tag_rdata = tag_array.rw(tag_addr, r_cpu_req_ppn, tag_we);
|
||||||
(io.cpu.req_val && io.cpu.req_rdy) ||
|
// tag_array.write(tag_addr, r_cpu_req_ppn, tag_we);
|
||||||
(state === s_start_writeback) ||
|
// val tag_rdata = tag_array(tag_addr);
|
||||||
(state === s_writeback)));
|
|
||||||
val tag_array_web = Mux(reset, Bool(true), !tag_we);
|
|
||||||
tag_array.io.A := tag_addr;
|
|
||||||
tag_array.io.D := r_cpu_req_ppn;
|
|
||||||
tag_array.io.CEB := tag_array_ceb && tag_array_web;
|
|
||||||
tag_array.io.WEB := tag_array_web;
|
|
||||||
tag_array.io.BWEB := Bits(0,tagbits);
|
|
||||||
val tag_rdata = tag_array.io.Q;
|
|
||||||
tag_array.io.TSEL := Bits(1,2);
|
|
||||||
|
|
||||||
// valid bit array
|
// valid bit array
|
||||||
val vb_array = Reg(resetVal = Bits(0, lines));
|
val vb_array = Reg(resetVal = Bits(0, lines));
|
||||||
@ -334,14 +325,9 @@ class rocketDCacheDM(lines: Int) extends Component {
|
|||||||
val store_wmask_d = storegen.io.store_wmask;
|
val store_wmask_d = storegen.io.store_wmask;
|
||||||
val store_wmask = Mux(p_store_idx(offsetlsb).toBool, Cat(store_wmask_d, Bits(0,64)), Cat(Bits(0,64), store_wmask_d));
|
val store_wmask = Mux(p_store_idx(offsetlsb).toBool, Cat(store_wmask_d, Bits(0,64)), Cat(Bits(0,64), store_wmask_d));
|
||||||
|
|
||||||
// data array
|
|
||||||
// val data_array = new rocketSRAMsp(lines*4, 128);
|
|
||||||
val data_array = new TS1N65LPA512X128M4;
|
|
||||||
val data_array_rdata = data_array.io.Q;
|
|
||||||
val resp_data = Mux(r_cpu_req_idx(offsetlsb).toBool, data_array_rdata(127, 64), data_array_rdata(63,0));
|
|
||||||
val r_resp_data = Reg(resp_data);
|
|
||||||
|
|
||||||
// ALU for AMOs
|
// ALU for AMOs
|
||||||
|
val amo_alu = new rocketDCacheAmoALU();
|
||||||
|
val amo_alu_out = Cat(amo_alu.io.result,amo_alu.io.result);
|
||||||
val amo_wmask =
|
val amo_wmask =
|
||||||
Mux(r_cpu_req_type === MT_D, ~Bits(0,8),
|
Mux(r_cpu_req_type === MT_D, ~Bits(0,8),
|
||||||
Mux(r_cpu_req_idx(2).toBool, Cat(~Bits(0,4), Bits(0,4)),
|
Mux(r_cpu_req_idx(2).toBool, Cat(~Bits(0,4), Bits(0,4)),
|
||||||
@ -358,47 +344,42 @@ class rocketDCacheDM(lines: Int) extends Component {
|
|||||||
|
|
||||||
val amo_store_wmask = Mux(r_cpu_req_idx(offsetlsb).toBool, Cat(amo_store_wmask_d, Bits(0,64)), Cat(Bits(0,64), amo_store_wmask_d));
|
val amo_store_wmask = Mux(r_cpu_req_idx(offsetlsb).toBool, Cat(amo_store_wmask_d, Bits(0,64)), Cat(Bits(0,64), amo_store_wmask_d));
|
||||||
|
|
||||||
val amo_alu = new rocketDCacheAmoALU();
|
// data array
|
||||||
amo_alu.io.cmd := r_cpu_req_cmd;
|
val data_addr =
|
||||||
amo_alu.io.wmask := amo_wmask;
|
|
||||||
amo_alu.io.lhs := Mux(r_cpu_resp_val, resp_data, r_resp_data).toUFix;
|
|
||||||
amo_alu.io.rhs := r_amo_data.toUFix;
|
|
||||||
val amo_alu_out = Cat(amo_alu.io.result,amo_alu.io.result);
|
|
||||||
|
|
||||||
data_array.io.A :=
|
|
||||||
Mux(drain_store || resolve_store, p_store_idx(PGIDX_BITS-1, offsetmsb-1),
|
Mux(drain_store || resolve_store, p_store_idx(PGIDX_BITS-1, offsetmsb-1),
|
||||||
Mux((state === s_writeback) && io.mem.req_rdy, Cat(r_cpu_req_idx(PGIDX_BITS-1, offsetbits), rr_count_next),
|
Mux((state === s_writeback) && io.mem.req_rdy, Cat(r_cpu_req_idx(PGIDX_BITS-1, offsetbits), rr_count_next),
|
||||||
Mux((state === s_start_writeback) || (state === s_writeback) || (state === s_refill), Cat(r_cpu_req_idx(PGIDX_BITS-1, offsetbits), rr_count),
|
Mux((state === s_start_writeback) || (state === s_writeback) || (state === s_refill), Cat(r_cpu_req_idx(PGIDX_BITS-1, offsetbits), rr_count),
|
||||||
Mux((state === s_resolve_miss) || (state === s_replay_load) || (state === s_write_amo), r_cpu_req_idx(PGIDX_BITS-1, offsetmsb-1),
|
Mux((state === s_resolve_miss) || (state === s_replay_load) || (state === s_write_amo), r_cpu_req_idx(PGIDX_BITS-1, offsetmsb-1),
|
||||||
io.cpu.req_idx(PGIDX_BITS-1, offsetmsb-1))))).toUFix;
|
io.cpu.req_idx(PGIDX_BITS-1, offsetmsb-1))))).toUFix;
|
||||||
|
|
||||||
data_array.io.D :=
|
val data_wdata =
|
||||||
Mux((state === s_refill), io.mem.resp_data,
|
Mux((state === s_refill), io.mem.resp_data,
|
||||||
Mux((state === s_write_amo), amo_alu_out,
|
Mux((state === s_write_amo), amo_alu_out,
|
||||||
store_data));
|
store_data));
|
||||||
|
|
||||||
val data_array_web = Mux(reset, Bool(true), !(
|
val data_we =
|
||||||
((state === s_refill) && io.mem.resp_val) ||
|
((state === s_refill) && io.mem.resp_val) ||
|
||||||
(state === s_write_amo) ||
|
(state === s_write_amo) ||
|
||||||
drain_store || resolve_store));
|
drain_store || resolve_store;
|
||||||
|
|
||||||
data_array.io.WEB := data_array_web;
|
val data_wmask =
|
||||||
|
|
||||||
data_array.io.BWEB := ~(
|
|
||||||
Mux((state === s_refill), ~Bits(0,128),
|
Mux((state === s_refill), ~Bits(0,128),
|
||||||
Mux((state === s_write_amo), amo_store_wmask,
|
Mux((state === s_write_amo), amo_store_wmask,
|
||||||
store_wmask)));
|
store_wmask));
|
||||||
|
|
||||||
val data_array_ceb = Mux(reset, Bool(true), !(
|
val data_array = Mem4(lines*4, data_wdata);
|
||||||
(io.cpu.req_val && io.cpu.req_rdy && (req_load || req_amo)) ||
|
data_array.setReadLatency(0);
|
||||||
(state === s_start_writeback) ||
|
val data_array_rdata = data_array.rw(data_addr, data_wdata, data_we, data_wmask);
|
||||||
(state === s_writeback) ||
|
// data_array.write(data_addr, data_wdata, data_we, data_wmask);
|
||||||
((state === s_resolve_miss) && (r_req_load || r_req_amo)) ||
|
// val data_array_rdata = data_array(data_addr);
|
||||||
(state === s_replay_load)));
|
val resp_data = Mux(r_cpu_req_idx(offsetlsb).toBool, data_array_rdata(127, 64), data_array_rdata(63,0));
|
||||||
|
val r_resp_data = Reg(resp_data);
|
||||||
|
|
||||||
data_array.io.CEB := data_array_ceb && data_array_web;
|
amo_alu.io.cmd := r_cpu_req_cmd;
|
||||||
|
amo_alu.io.wmask := amo_wmask;
|
||||||
|
amo_alu.io.lhs := Mux(r_cpu_resp_val, resp_data, r_resp_data).toUFix;
|
||||||
|
amo_alu.io.rhs := r_amo_data.toUFix;
|
||||||
|
|
||||||
data_array.io.TSEL := Bits(1,2);
|
|
||||||
// signal a load miss when the data isn't present in the cache and when it's in the pending store data register
|
// signal a load miss when the data isn't present in the cache and when it's in the pending store data register
|
||||||
// (causes the cache to block for 2 cycles and the load or amo instruction is replayed)
|
// (causes the cache to block for 2 cycles and the load or amo instruction is replayed)
|
||||||
val load_miss =
|
val load_miss =
|
||||||
|
@ -34,49 +34,6 @@ class ioICacheDM extends Bundle()
|
|||||||
val mem = new ioIcache().flip();
|
val mem = new ioIcache().flip();
|
||||||
}
|
}
|
||||||
|
|
||||||
// single port SRAM i/o
|
|
||||||
class ioSRAMsp (width: Int, addrbits: Int) extends Bundle {
|
|
||||||
val A = UFix(addrbits, 'input); // address
|
|
||||||
val D = Bits(width, 'input); // data input
|
|
||||||
val BWEB = Bits(width, 'input); // bit write enable mask
|
|
||||||
val CEB = Bool('input); // chip enable
|
|
||||||
val WEB = Bool('input); // write enable
|
|
||||||
val Q = Bits(width, 'output); // data out
|
|
||||||
val TSEL = Bits(2, 'input);
|
|
||||||
}
|
|
||||||
|
|
||||||
// single ported SRAM
|
|
||||||
class TS1N65LPA128X27M4 extends Component {
|
|
||||||
val addrbits = 7;
|
|
||||||
val width = 27;
|
|
||||||
val entries = 128;
|
|
||||||
val io = new ioSRAMsp(width, addrbits);
|
|
||||||
val wmask = ~io.BWEB;
|
|
||||||
val sram = Mem(entries, !io.WEB, io.A, io.D, wrMask = wmask, resetVal = null);
|
|
||||||
val rdata = Reg(Mux(!io.CEB, sram.read(io.A), Bits(0,width)));
|
|
||||||
io.Q := rdata;
|
|
||||||
}
|
|
||||||
|
|
||||||
class TS1N65LPA512X128M4 extends Component {
|
|
||||||
val addrbits = 9;
|
|
||||||
val width = 128;
|
|
||||||
val entries = 512;
|
|
||||||
val io = new ioSRAMsp(width, addrbits);
|
|
||||||
val wmask = ~io.BWEB;
|
|
||||||
val sram = Mem(entries, !io.WEB, io.A, io.D, wrMask = wmask, resetVal = null);
|
|
||||||
val rdata = Reg(Mux(!io.CEB, sram.read(io.A), Bits(0,width)));
|
|
||||||
io.Q := rdata;
|
|
||||||
}
|
|
||||||
/*
|
|
||||||
class rocketSRAMsp(entries: Int, width: Int) extends Component {
|
|
||||||
val addrbits = ceil(log10(entries)/log10(2)).toInt;
|
|
||||||
val io = new ioSRAMsp(width, addrbits);
|
|
||||||
val sram = Mem(entries, io.we, io.a, io.d, wrMask = io.bweb, resetVal = null);
|
|
||||||
val rdata = Reg(Mux(io.ce, sram.read(io.a), Bits(0,width)));
|
|
||||||
io.q := rdata;
|
|
||||||
}
|
|
||||||
*/
|
|
||||||
|
|
||||||
// basic direct mapped instruction cache
|
// basic direct mapped instruction cache
|
||||||
// 32 bit wide cpu port, 128 bit wide memory port, 64 byte cachelines
|
// 32 bit wide cpu port, 128 bit wide memory port, 64 byte cachelines
|
||||||
// parameters :
|
// parameters :
|
||||||
@ -121,23 +78,17 @@ class rocketICacheDM(lines: Int) extends Component {
|
|||||||
when (io.mem.resp_val) {
|
when (io.mem.resp_val) {
|
||||||
refill_count <== refill_count + UFix(1);
|
refill_count <== refill_count + UFix(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
// tag array
|
|
||||||
// val tag_array = new rocketSRAMsp(lines, tagbits);
|
|
||||||
val tag_array = new TS1N65LPA128X27M4;
|
|
||||||
val tag_addr =
|
val tag_addr =
|
||||||
Mux((state === s_refill_wait), r_cpu_req_idx(PGIDX_BITS-1,offsetbits),
|
Mux((state === s_refill_wait), r_cpu_req_idx(PGIDX_BITS-1,offsetbits),
|
||||||
io.cpu.req_idx(PGIDX_BITS-1,offsetbits)).toUFix;
|
io.cpu.req_idx(PGIDX_BITS-1,offsetbits)).toUFix;
|
||||||
val tag_we = (state === s_refill_wait) && io.mem.resp_val;
|
val tag_we = (state === s_refill_wait) && io.mem.resp_val;
|
||||||
val tag_array_ceb = Mux(reset, Bool(true), !(io.cpu.req_val && io.cpu.req_rdy));
|
|
||||||
val tag_array_web = Mux(reset, Bool(true), !tag_we);
|
val tag_array = Mem4(lines, r_cpu_req_ppn);
|
||||||
tag_array.io.A := tag_addr;
|
tag_array.setReadLatency(0);
|
||||||
tag_array.io.D := r_cpu_req_ppn;
|
val tag_rdata = tag_array.rw(tag_addr, r_cpu_req_ppn, tag_we);
|
||||||
tag_array.io.CEB := tag_array_ceb && tag_array_web;
|
|
||||||
tag_array.io.WEB := tag_array_web;
|
// tag_array.write(tag_addr, r_cpu_req_ppn, tag_we);
|
||||||
tag_array.io.TSEL := Bits(1,2);
|
// val tag_rdata = tag_array.read(tag_addr);
|
||||||
tag_array.io.BWEB := Bits(0,tagbits);
|
|
||||||
val tag_rdata = tag_array.io.Q;
|
|
||||||
|
|
||||||
// valid bit array
|
// valid bit array
|
||||||
val vb_array = Reg(resetVal = Bits(0, lines));
|
val vb_array = Reg(resetVal = Bits(0, lines));
|
||||||
@ -152,20 +103,14 @@ class rocketICacheDM(lines: Int) extends Component {
|
|||||||
val tag_match = (tag_rdata === io.cpu.req_ppn);
|
val tag_match = (tag_rdata === io.cpu.req_ppn);
|
||||||
|
|
||||||
// data array
|
// data array
|
||||||
// val data_array = new rocketSRAMsp(lines*4, 128);
|
val data_addr =
|
||||||
val data_array = new TS1N65LPA512X128M4;
|
|
||||||
val data_array_ceb = Mux(reset, Bool(true), !((io.cpu.req_rdy && io.cpu.req_val) || (state === s_resolve_miss)));
|
|
||||||
val data_array_web = Mux(reset, Bool(true), ~io.mem.resp_val);
|
|
||||||
data_array.io.A :=
|
|
||||||
Mux((state === s_refill_wait) || (state === s_refill), Cat(r_cpu_req_idx(PGIDX_BITS-1, offsetbits), refill_count),
|
Mux((state === s_refill_wait) || (state === s_refill), Cat(r_cpu_req_idx(PGIDX_BITS-1, offsetbits), refill_count),
|
||||||
io.cpu.req_idx(PGIDX_BITS-1, offsetmsb-1)).toUFix;
|
io.cpu.req_idx(PGIDX_BITS-1, offsetmsb-1)).toUFix;
|
||||||
data_array.io.D := io.mem.resp_data;
|
val data_array = Mem4(lines*4, io.mem.resp_data);
|
||||||
data_array.io.CEB := data_array_ceb && data_array_web;
|
data_array.setReadLatency(0);
|
||||||
data_array.io.WEB := data_array_web;
|
val data_array_rdata = data_array.rw(data_addr, io.mem.resp_data, io.mem.resp_val);
|
||||||
data_array.io.BWEB := Bits(0,128);
|
// data_array.write(data_addr, io.mem.resp_data, io.mem.resp_val);
|
||||||
data_array.io.TSEL := Bits(1,2);
|
// val data_array_rdata = data_array.read(data_addr);
|
||||||
|
|
||||||
val data_array_rdata = data_array.io.Q;
|
|
||||||
|
|
||||||
// output signals
|
// output signals
|
||||||
io.cpu.resp_val := !io.cpu.itlb_miss && (state === s_ready) && r_cpu_req_val && tag_valid && tag_match;
|
io.cpu.resp_val := !io.cpu.itlb_miss && (state === s_ready) && r_cpu_req_val && tag_valid && tag_match;
|
||||||
|
@ -38,7 +38,8 @@ object top_main {
|
|||||||
def main(args: Array[String]) = {
|
def main(args: Array[String]) = {
|
||||||
// Can turn off --debug and --vcd when done with debugging to improve emulator performance
|
// Can turn off --debug and --vcd when done with debugging to improve emulator performance
|
||||||
// val cpu_args = args ++ Array("--target-dir", "generated-src","--debug","--vcd");
|
// val cpu_args = args ++ Array("--target-dir", "generated-src","--debug","--vcd");
|
||||||
val cpu_args = args ++ Array("--target-dir", "generated-src","--debug");
|
// val cpu_args = args ++ Array("--target-dir", "generated-src", "--debug");
|
||||||
|
val cpu_args = args ++ Array("--target-dir", "generated-src");
|
||||||
// Set variables based off of command flags
|
// Set variables based off of command flags
|
||||||
// for(a <- args) {
|
// for(a <- args) {
|
||||||
// a match {
|
// a match {
|
||||||
|
Loading…
Reference in New Issue
Block a user