1
0

Refactored cpu/cache interface to use nested bundles

This commit is contained in:
Henry Cook 2012-05-01 18:23:04 -07:00
parent 65ff397122
commit 622a801bb1
5 changed files with 198 additions and 150 deletions

View File

@ -7,15 +7,15 @@ import hwacha._
class ioRocket extends Bundle() class ioRocket extends Bundle()
{ {
val host = new ioHTIF(); val host = new ioHTIF
val imem = new ioImem().flip val imem = (new ioImem).flip
val vimem = new ioImem().flip val vimem = (new ioImem).flip
val dmem = new ioDmem().flip val dmem = new ioHellaCache
} }
class rocketProc(resetSignal: Bool = null) extends Component(resetSignal) class rocketProc(resetSignal: Bool = null) extends Component(resetSignal)
{ {
val io = new ioRocket(); val io = new ioRocket
val ctrl = new rocketCtrl(); val ctrl = new rocketCtrl();
val dpath = new rocketDpath(); val dpath = new rocketDpath();
@ -24,7 +24,7 @@ class rocketProc(resetSignal: Bool = null) extends Component(resetSignal)
val itlb = new rocketITLB(ITLB_ENTRIES); val itlb = new rocketITLB(ITLB_ENTRIES);
val vitlb = new rocketITLB(VITLB_ENTRIES) val vitlb = new rocketITLB(VITLB_ENTRIES)
val ptw = new rocketPTW(); val ptw = new rocketPTW();
val arb = new rocketDmemArbiter(DCACHE_PORTS) val arb = new rocketHellaCacheArbiter(DCACHE_PORTS)
var vu: vu = null var vu: vu = null
if (HAVE_VEC) if (HAVE_VEC)
@ -59,7 +59,7 @@ class rocketProc(resetSignal: Bool = null) extends Component(resetSignal)
// connect DTLB to ctrl+dpath // connect DTLB to ctrl+dpath
dtlbarb.io.in(DTLB_CPU).valid := ctrl.io.dtlb_val dtlbarb.io.in(DTLB_CPU).valid := ctrl.io.dtlb_val
dtlbarb.io.in(DTLB_CPU).bits.kill := ctrl.io.dtlb_kill dtlbarb.io.in(DTLB_CPU).bits.kill := ctrl.io.dtlb_kill
dtlbarb.io.in(DTLB_CPU).bits.cmd := ctrl.io.dmem.req_cmd dtlbarb.io.in(DTLB_CPU).bits.cmd := ctrl.io.dmem.req.bits.cmd
dtlbarb.io.in(DTLB_CPU).bits.asid := Bits(0,ASID_BITS); // FIXME: connect to PCR dtlbarb.io.in(DTLB_CPU).bits.asid := Bits(0,ASID_BITS); // FIXME: connect to PCR
dtlbarb.io.in(DTLB_CPU).bits.vpn := dpath.io.dtlb.vpn dtlbarb.io.in(DTLB_CPU).bits.vpn := dpath.io.dtlb.vpn
ctrl.io.dtlb_rdy := dtlbarb.io.in(DTLB_CPU).ready ctrl.io.dtlb_rdy := dtlbarb.io.in(DTLB_CPU).ready
@ -75,7 +75,7 @@ class rocketProc(resetSignal: Bool = null) extends Component(resetSignal)
// connect DTLB to ctrl+dpath // connect DTLB to ctrl+dpath
dtlb.io.cpu_req.valid := ctrl.io.dtlb_val dtlb.io.cpu_req.valid := ctrl.io.dtlb_val
dtlb.io.cpu_req.bits.kill := ctrl.io.dtlb_kill dtlb.io.cpu_req.bits.kill := ctrl.io.dtlb_kill
dtlb.io.cpu_req.bits.cmd := ctrl.io.dmem.req_cmd dtlb.io.cpu_req.bits.cmd := ctrl.io.dmem.req.bits.cmd
dtlb.io.cpu_req.bits.asid := Bits(0,ASID_BITS); // FIXME: connect to PCR dtlb.io.cpu_req.bits.asid := Bits(0,ASID_BITS); // FIXME: connect to PCR
dtlb.io.cpu_req.bits.vpn := dpath.io.dtlb.vpn dtlb.io.cpu_req.bits.vpn := dpath.io.dtlb.vpn
ctrl.io.xcpt_dtlb_ld := dtlb.io.cpu_resp.xcpt_ld ctrl.io.xcpt_dtlb_ld := dtlb.io.cpu_resp.xcpt_ld
@ -87,8 +87,8 @@ class rocketProc(resetSignal: Bool = null) extends Component(resetSignal)
dtlb.io.invalidate := dpath.io.ptbr_wen dtlb.io.invalidate := dpath.io.ptbr_wen
dtlb.io.status := dpath.io.ctrl.status dtlb.io.status := dpath.io.ctrl.status
arb.io.requestor(DMEM_CPU).req_ppn := dtlb.io.cpu_resp.ppn arb.io.requestor(DMEM_CPU).req.bits.ppn := dtlb.io.cpu_resp.ppn
ctrl.io.dmem.req_rdy := dtlb.io.cpu_req.ready && arb.io.requestor(DMEM_CPU).req_rdy ctrl.io.dmem.req.ready := dtlb.io.cpu_req.ready && arb.io.requestor(DMEM_CPU).req.ready
// connect page table walker to TLBs, page table base register (from PCR) // connect page table walker to TLBs, page table base register (from PCR)
// and D$ arbiter (selects between requests from pipeline and PTW, PTW has priority) // and D$ arbiter (selects between requests from pipeline and PTW, PTW has priority)
@ -96,8 +96,8 @@ class rocketProc(resetSignal: Bool = null) extends Component(resetSignal)
ptw.io.itlb <> itlb.io.ptw; ptw.io.itlb <> itlb.io.ptw;
ptw.io.vitlb <> vitlb.io.ptw ptw.io.vitlb <> vitlb.io.ptw
ptw.io.ptbr := dpath.io.ptbr; ptw.io.ptbr := dpath.io.ptbr;
arb.io.requestor(DMEM_PTW) <> ptw.io.dmem arb.io.requestor(DMEM_PTW) <> ptw.io.mem
arb.io.dmem <> io.dmem arb.io.mem <> io.dmem
ctrl.io.dpath <> dpath.io.ctrl; ctrl.io.dpath <> dpath.io.ctrl;
dpath.io.host <> io.host; dpath.io.host <> io.host;
@ -120,8 +120,18 @@ class rocketProc(resetSignal: Bool = null) extends Component(resetSignal)
io.imem.itlb_miss := itlb.io.cpu.resp_miss; io.imem.itlb_miss := itlb.io.cpu.resp_miss;
// connect arbiter to ctrl+dpath+DTLB // connect arbiter to ctrl+dpath+DTLB
arb.io.requestor(DMEM_CPU) <> ctrl.io.dmem arb.io.requestor(DMEM_CPU).resp <> ctrl.io.dmem.resp
arb.io.requestor(DMEM_CPU) <> dpath.io.dmem arb.io.requestor(DMEM_CPU).xcpt <> ctrl.io.dmem.xcpt
arb.io.requestor(DMEM_CPU).resp <> dpath.io.dmem.resp
//TODO: views on nested bundles?
arb.io.requestor(DMEM_CPU).req.valid := ctrl.io.dmem.req.valid
ctrl.io.dmem.req.ready := arb.io.requestor(DMEM_CPU).req.ready
arb.io.requestor(DMEM_CPU).req.bits.kill := ctrl.io.dmem.req.bits.kill
arb.io.requestor(DMEM_CPU).req.bits.cmd := ctrl.io.dmem.req.bits.cmd
arb.io.requestor(DMEM_CPU).req.bits.typ := ctrl.io.dmem.req.bits.typ
arb.io.requestor(DMEM_CPU).req.bits.idx := dpath.io.dmem.req.bits.idx
arb.io.requestor(DMEM_CPU).req.bits.tag := dpath.io.dmem.req.bits.tag
arb.io.requestor(DMEM_CPU).req.bits.data := dpath.io.dmem.req.bits.data
var fpu: rocketFPU = null var fpu: rocketFPU = null
if (HAVE_FPU) if (HAVE_FPU)
@ -207,21 +217,21 @@ class rocketProc(resetSignal: Bool = null) extends Component(resetSignal)
storegen.io.typ := vu.io.dmem_req.bits.typ storegen.io.typ := vu.io.dmem_req.bits.typ
storegen.io.din := vu.io.dmem_req.bits.data storegen.io.din := vu.io.dmem_req.bits.data
arb.io.requestor(DMEM_VU).req_val := vu.io.dmem_req.valid arb.io.requestor(DMEM_VU).req.valid := vu.io.dmem_req.valid
arb.io.requestor(DMEM_VU).req_kill := vu.io.dmem_req.bits.kill arb.io.requestor(DMEM_VU).req.bits.kill := vu.io.dmem_req.bits.kill
arb.io.requestor(DMEM_VU).req_cmd := vu.io.dmem_req.bits.cmd arb.io.requestor(DMEM_VU).req.bits.cmd := vu.io.dmem_req.bits.cmd
arb.io.requestor(DMEM_VU).req_type := vu.io.dmem_req.bits.typ arb.io.requestor(DMEM_VU).req.bits.typ := vu.io.dmem_req.bits.typ
arb.io.requestor(DMEM_VU).req_idx := vu.io.dmem_req.bits.idx arb.io.requestor(DMEM_VU).req.bits.idx := vu.io.dmem_req.bits.idx
arb.io.requestor(DMEM_VU).req_ppn := Reg(vu.io.dmem_req.bits.ppn) arb.io.requestor(DMEM_VU).req.bits.ppn := Reg(vu.io.dmem_req.bits.ppn)
arb.io.requestor(DMEM_VU).req_data := Reg(storegen.io.dout) arb.io.requestor(DMEM_VU).req.bits.data := Reg(storegen.io.dout)
arb.io.requestor(DMEM_VU).req_tag := vu.io.dmem_req.bits.tag arb.io.requestor(DMEM_VU).req.bits.tag := vu.io.dmem_req.bits.tag
vu.io.dmem_req.ready := arb.io.requestor(DMEM_VU).req_rdy vu.io.dmem_req.ready := arb.io.requestor(DMEM_VU).req.ready
vu.io.dmem_resp.valid := Reg(arb.io.requestor(DMEM_VU).resp_val) vu.io.dmem_resp.valid := Reg(arb.io.requestor(DMEM_VU).resp.valid)
vu.io.dmem_resp.bits.nack := arb.io.requestor(DMEM_VU).resp_nack vu.io.dmem_resp.bits.nack := arb.io.requestor(DMEM_VU).resp.bits.nack
vu.io.dmem_resp.bits.data := arb.io.requestor(DMEM_VU).resp_data_subword vu.io.dmem_resp.bits.data := arb.io.requestor(DMEM_VU).resp.bits.data_subword
vu.io.dmem_resp.bits.tag := Reg(arb.io.requestor(DMEM_VU).resp_tag) vu.io.dmem_resp.bits.tag := Reg(arb.io.requestor(DMEM_VU).resp.bits.tag)
vu.io.dmem_resp.bits.typ := Reg(arb.io.requestor(DMEM_VU).resp_type) vu.io.dmem_resp.bits.typ := Reg(arb.io.requestor(DMEM_VU).resp.bits.typ)
// share vector integer multiplier with rocket // share vector integer multiplier with rocket
dpath.io.vec_imul_req <> vu.io.cp_imul_req dpath.io.vec_imul_req <> vu.io.cp_imul_req
@ -233,7 +243,7 @@ class rocketProc(resetSignal: Bool = null) extends Component(resetSignal)
} }
else else
{ {
arb.io.requestor(DMEM_VU).req_val := Bool(false) arb.io.requestor(DMEM_VU).req.valid := Bool(false)
if (HAVE_FPU) if (HAVE_FPU)
{ {
fpu.io.sfma.valid := Bool(false) fpu.io.sfma.valid := Bool(false)

View File

@ -76,7 +76,7 @@ class ioCtrlAll extends Bundle()
{ {
val dpath = new ioCtrlDpath(); val dpath = new ioCtrlDpath();
val imem = new ioImem(List("req_val", "resp_val")).flip val imem = new ioImem(List("req_val", "resp_val")).flip
val dmem = new ioDmem(List("req_val", "req_kill", "req_rdy", "req_cmd", "req_type", "resp_miss", "resp_nack", "xcpt_ma_ld", "xcpt_ma_st")).flip val dmem = new ioHellaCache
val dtlb_val = Bool(OUTPUT); val dtlb_val = Bool(OUTPUT);
val dtlb_kill = Bool(OUTPUT); val dtlb_kill = Bool(OUTPUT);
val dtlb_rdy = Bool(INPUT); val dtlb_rdy = Bool(INPUT);
@ -351,7 +351,7 @@ class rocketCtrl extends Component
val id_waddr = Mux(id_sel_wa === WA_RA, RA, io.dpath.inst(31,27)); val id_waddr = Mux(id_sel_wa === WA_RA, RA, io.dpath.inst(31,27));
val wb_reg_div_mul_val = Reg(resetVal = Bool(false)) val wb_reg_div_mul_val = Reg(resetVal = Bool(false))
val wb_reg_dcache_miss = Reg(io.dmem.resp_miss || io.dmem.resp_nack, resetVal = Bool(false)); val wb_reg_dcache_miss = Reg(io.dmem.resp.bits.miss || io.dmem.resp.bits.nack, resetVal = Bool(false));
val id_reg_valid = Reg(resetVal = Bool(false)); val id_reg_valid = Reg(resetVal = Bool(false));
val id_reg_btb_hit = Reg(resetVal = Bool(false)); val id_reg_btb_hit = Reg(resetVal = Bool(false));
@ -681,8 +681,8 @@ class rocketCtrl extends Component
} }
// exception handling // exception handling
val mem_xcpt_ma_ld = io.dmem.xcpt_ma_ld && !mem_reg_kill val mem_xcpt_ma_ld = io.dmem.xcpt.ma.ld && !mem_reg_kill
val mem_xcpt_ma_st = io.dmem.xcpt_ma_st && !mem_reg_kill val mem_xcpt_ma_st = io.dmem.xcpt.ma.st && !mem_reg_kill
val mem_xcpt_dtlb_ld = io.xcpt_dtlb_ld && !mem_reg_kill val mem_xcpt_dtlb_ld = io.xcpt_dtlb_ld && !mem_reg_kill
val mem_xcpt_dtlb_st = io.xcpt_dtlb_st && !mem_reg_kill val mem_xcpt_dtlb_st = io.xcpt_dtlb_st && !mem_reg_kill
@ -722,7 +722,7 @@ class rocketCtrl extends Component
// replay mem stage PC on a DTLB miss or a long-latency writeback // replay mem stage PC on a DTLB miss or a long-latency writeback
val mem_ll_wb = io.dpath.mem_wb || io.dpath.mul_result_val || io.dpath.div_result_val val mem_ll_wb = io.dpath.mem_wb || io.dpath.mul_result_val || io.dpath.div_result_val
val dmem_kill_mem = mem_reg_valid && (io.dtlb_miss || io.dmem.resp_nack) val dmem_kill_mem = mem_reg_valid && (io.dtlb_miss || io.dmem.resp.bits.nack)
val fpu_kill_mem = mem_reg_fp_val && io.fpu.nack_mem val fpu_kill_mem = mem_reg_fp_val && io.fpu.nack_mem
val replay_mem = dmem_kill_mem || mem_reg_wen && mem_ll_wb || mem_reg_replay || fpu_kill_mem val replay_mem = dmem_kill_mem || mem_reg_wen && mem_ll_wb || mem_reg_replay || fpu_kill_mem
val kill_mem = dmem_kill_mem || mem_reg_wen && mem_ll_wb || take_pc_wb || mem_exception || mem_reg_kill || fpu_kill_mem val kill_mem = dmem_kill_mem || mem_reg_wen && mem_ll_wb || take_pc_wb || mem_exception || mem_reg_kill || fpu_kill_mem
@ -731,7 +731,7 @@ class rocketCtrl extends Component
// replay execute stage PC when the D$ is blocked, when the D$ misses, // replay execute stage PC when the D$ is blocked, when the D$ misses,
// for privileged instructions, and for fence.i instructions // for privileged instructions, and for fence.i instructions
val replay_ex = wb_reg_dcache_miss && ex_reg_load_use || mem_reg_flush_inst || val replay_ex = wb_reg_dcache_miss && ex_reg_load_use || mem_reg_flush_inst ||
ex_reg_replay || ex_reg_mem_val && !(io.dmem.req_rdy && io.dtlb_rdy) || ex_reg_replay || ex_reg_mem_val && !(io.dmem.req.ready && io.dtlb_rdy) ||
ex_reg_div_val && !io.dpath.div_rdy || ex_reg_div_val && !io.dpath.div_rdy ||
ex_reg_mul_val && !io.dpath.mul_rdy ex_reg_mul_val && !io.dpath.mul_rdy
val kill_ex = take_pc_wb || replay_ex val kill_ex = take_pc_wb || replay_ex
@ -817,8 +817,8 @@ class rocketCtrl extends Component
id_ex_hazard || id_mem_hazard || id_wb_hazard || id_ex_hazard || id_mem_hazard || id_wb_hazard ||
id_stall_raddr1 || id_stall_raddr2 || id_stall_waddr || id_stall_raddr1 || id_stall_raddr2 || id_stall_waddr ||
id_fp_val && id_stall_fpu || id_fp_val && id_stall_fpu ||
id_mem_val.toBool && !(io.dmem.req_rdy && io.dtlb_rdy) || id_mem_val.toBool && !(io.dmem.req.ready && io.dtlb_rdy) ||
((id_sync === SYNC_D) || (id_sync === SYNC_I)) && !io.dmem.req_rdy || ((id_sync === SYNC_D) || (id_sync === SYNC_I)) && !io.dmem.req.ready ||
vec_stalld vec_stalld
); );
val ctrl_stallf = ctrl_stalld; val ctrl_stallf = ctrl_stalld;
@ -861,10 +861,10 @@ class rocketCtrl extends Component
io.fpu.killx := kill_ex io.fpu.killx := kill_ex
io.fpu.killm := kill_mem io.fpu.killm := kill_mem
io.dtlb_val := ex_reg_mem_val io.dtlb_val := ex_reg_mem_val
io.dtlb_kill := mem_reg_kill; io.dtlb_kill := mem_reg_kill
io.dmem.req_val := ex_reg_mem_val io.dmem.req.valid := ex_reg_mem_val
io.dmem.req_kill := kill_dcache; io.dmem.req.bits.kill := kill_dcache
io.dmem.req_cmd := ex_reg_mem_cmd; io.dmem.req.bits.cmd := ex_reg_mem_cmd
io.dmem.req_type := ex_reg_mem_type; io.dmem.req.bits.typ := ex_reg_mem_type
} }

View File

@ -17,7 +17,7 @@ class ioDpathAll extends Bundle()
{ {
val host = new ioHTIF(); val host = new ioHTIF();
val ctrl = new ioCtrlDpath().flip val ctrl = new ioCtrlDpath().flip
val dmem = new ioDmem(List("req_idx", "req_tag", "req_data", "resp_val", "resp_miss", "resp_replay", "resp_type", "resp_tag", "resp_data", "resp_data_subword")).flip val dmem = new ioHellaCache
val dtlb = new ioDTLB_CPU_req_bundle().asOutput() val dtlb = new ioDTLB_CPU_req_bundle().asOutput()
val imem = new ioDpathImem(); val imem = new ioDpathImem();
val ptbr_wen = Bool(OUTPUT); val ptbr_wen = Bool(OUTPUT);
@ -274,9 +274,9 @@ class rocketDpath extends Component
// D$ request interface (registered inside D$ module) // D$ request interface (registered inside D$ module)
// other signals (req_val, req_rdy) connect to control module // other signals (req_val, req_rdy) connect to control module
io.dmem.req_idx := ex_effective_address io.dmem.req.bits.idx := ex_effective_address
io.dmem.req_data := Mux(io.ctrl.mem_fp_val, io.fpu.store_data, mem_reg_rs2) io.dmem.req.bits.data := Mux(io.ctrl.mem_fp_val, io.fpu.store_data, mem_reg_rs2)
io.dmem.req_tag := Cat(ex_reg_waddr, io.ctrl.ex_fp_val) io.dmem.req.bits.tag := Cat(ex_reg_waddr, io.ctrl.ex_fp_val)
io.dtlb.vpn := ex_effective_address >> UFix(PGIDX_BITS) io.dtlb.vpn := ex_effective_address >> UFix(PGIDX_BITS)
// processor control regfile read // processor control regfile read
@ -332,13 +332,13 @@ class rocketDpath extends Component
// 32/64 bit load handling (moved to earlier in file) // 32/64 bit load handling (moved to earlier in file)
// writeback arbitration // writeback arbitration
val dmem_resp_xpu = !io.dmem.resp_tag(0).toBool val dmem_resp_xpu = !io.dmem.resp.bits.tag(0).toBool
val dmem_resp_fpu = io.dmem.resp_tag(0).toBool val dmem_resp_fpu = io.dmem.resp.bits.tag(0).toBool
val dmem_resp_waddr = io.dmem.resp_tag.toUFix >> UFix(1) val dmem_resp_waddr = io.dmem.resp.bits.tag.toUFix >> UFix(1)
dmem_resp_replay := io.dmem.resp_replay && dmem_resp_xpu; dmem_resp_replay := io.dmem.resp.bits.replay && dmem_resp_xpu;
r_dmem_resp_replay := dmem_resp_replay r_dmem_resp_replay := dmem_resp_replay
r_dmem_resp_waddr := dmem_resp_waddr r_dmem_resp_waddr := dmem_resp_waddr
r_dmem_fp_replay := io.dmem.resp_replay && dmem_resp_fpu; r_dmem_fp_replay := io.dmem.resp.bits.replay && dmem_resp_fpu;
val mem_ll_waddr = Mux(dmem_resp_replay, dmem_resp_waddr, val mem_ll_waddr = Mux(dmem_resp_replay, dmem_resp_waddr,
Mux(div.io.resp_val, div.io.resp_tag, Mux(div.io.resp_val, div.io.resp_tag,
@ -350,9 +350,9 @@ class rocketDpath extends Component
mem_reg_wdata))) mem_reg_wdata)))
val mem_ll_wb = dmem_resp_replay || div.io.resp_val || mul_io.resp_val val mem_ll_wb = dmem_resp_replay || div.io.resp_val || mul_io.resp_val
io.fpu.dmem_resp_val := io.dmem.resp_val && dmem_resp_fpu io.fpu.dmem_resp_val := io.dmem.resp.valid && dmem_resp_fpu
io.fpu.dmem_resp_data := io.dmem.resp_data io.fpu.dmem_resp_data := io.dmem.resp.bits.data
io.fpu.dmem_resp_type := io.dmem.resp_type io.fpu.dmem_resp_type := io.dmem.resp.bits.typ
io.fpu.dmem_resp_tag := dmem_resp_waddr io.fpu.dmem_resp_tag := dmem_resp_waddr
// writeback stage // writeback stage
@ -362,7 +362,7 @@ class rocketDpath extends Component
wb_reg_rs2 := mem_reg_rs2 wb_reg_rs2 := mem_reg_rs2
wb_reg_waddr := mem_ll_waddr wb_reg_waddr := mem_ll_waddr
wb_reg_wdata := mem_ll_wdata wb_reg_wdata := mem_ll_wdata
wb_reg_dmem_wdata := io.dmem.resp_data wb_reg_dmem_wdata := io.dmem.resp.bits.data
wb_reg_vec_waddr := mem_reg_waddr wb_reg_vec_waddr := mem_reg_waddr
wb_reg_vec_wdata := mem_reg_wdata wb_reg_vec_wdata := mem_reg_wdata
wb_reg_raddr1 := mem_reg_raddr1 wb_reg_raddr1 := mem_reg_raddr1
@ -395,7 +395,7 @@ class rocketDpath extends Component
wb_wdata := wb_wdata :=
Mux(vec.io.wen, Cat(Bits(0,52), vec.io.appvl), Mux(vec.io.wen, Cat(Bits(0,52), vec.io.appvl),
Mux(wb_src_dmem, io.dmem.resp_data_subword, Mux(wb_src_dmem, io.dmem.resp.bits.data_subword,
wb_reg_wdata)) wb_reg_wdata))
} }
else else
@ -406,7 +406,7 @@ class rocketDpath extends Component
pcr.io.vec_nfregs := UFix(0) pcr.io.vec_nfregs := UFix(0)
wb_wdata := wb_wdata :=
Mux(wb_src_dmem, io.dmem.resp_data_subword, Mux(wb_src_dmem, io.dmem.resp.bits.data_subword,
wb_reg_wdata) wb_reg_wdata)
} }

View File

@ -716,6 +716,41 @@ class AMOALU extends Component {
io.out := Mux(word, Cat(out(31,0), out(31,0)).toUFix, out) io.out := Mux(word, Cat(out(31,0), out(31,0)).toUFix, out)
} }
class HellaCacheReq extends Bundle {
val cmd = Bits(width = 4)
val typ = Bits(width = 3)
val idx = Bits(width = PGIDX_BITS)
val ppn = Bits(width = PPN_BITS)
val data = Bits(width = 64)
val kill = Bool()
val tag = Bits(width = DCACHE_TAG_BITS)
}
class HellaCacheResp extends Bundle {
val miss = Bool()
val nack = Bool()
val replay = Bool()
val typ = Bits(width = 3)
val data = Bits(width = 64)
val data_subword = Bits(width = 64)
val tag = Bits(width = DCACHE_TAG_BITS)
}
class AlignmentExceptions extends Bundle {
val ld = Bool()
val st = Bool()
}
class HellaCacheExceptions extends Bundle {
val ma = new AlignmentExceptions
}
class ioHellaCache extends Bundle {
val req = (new ioDecoupled){ new HellaCacheReq }
val resp = (new ioPipe){ new HellaCacheResp }.flip
val xcpt = (new HellaCacheExceptions).asInput
}
// interface between D$ and processor/DTLB // interface between D$ and processor/DTLB
class ioDmem(view: List[String] = null) extends Bundle(view) { class ioDmem(view: List[String] = null) extends Bundle(view) {
val req_kill = Bool(INPUT); val req_kill = Bool(INPUT);
@ -741,7 +776,7 @@ class ioDmem(view: List[String] = null) extends Bundle(view) {
class HellaCache(co: CoherencePolicy) extends Component { class HellaCache(co: CoherencePolicy) extends Component {
val io = new Bundle { val io = new Bundle {
val cpu = new ioDmem() val cpu = (new ioHellaCache).flip
val mem = new ioTileLink val mem = new ioTileLink
} }
@ -759,8 +794,8 @@ class HellaCache(co: CoherencePolicy) extends Component {
val ramindexlsb = log2up(MEM_DATA_BITS/8) val ramindexlsb = log2up(MEM_DATA_BITS/8)
val early_nack = Reg { Bool() } val early_nack = Reg { Bool() }
val r_cpu_req_val_ = Reg(io.cpu.req_val && io.cpu.req_rdy, resetVal = Bool(false)) val r_cpu_req_val_ = Reg(io.cpu.req.valid && io.cpu.req.ready, resetVal = Bool(false))
val r_cpu_req_val = r_cpu_req_val_ && !io.cpu.req_kill && !early_nack val r_cpu_req_val = r_cpu_req_val_ && !io.cpu.req.bits.kill && !early_nack
val r_cpu_req_idx = Reg() { Bits() } val r_cpu_req_idx = Reg() { Bits() }
val r_cpu_req_cmd = Reg() { Bits() } val r_cpu_req_cmd = Reg() { Bits() }
val r_cpu_req_type = Reg() { Bits() } val r_cpu_req_type = Reg() { Bits() }
@ -776,9 +811,9 @@ class HellaCache(co: CoherencePolicy) extends Component {
val p_store_way_oh = Reg() { Bits() } val p_store_way_oh = Reg() { Bits() }
val r_replay_amo = Reg(resetVal = Bool(false)) val r_replay_amo = Reg(resetVal = Bool(false))
val req_store = (io.cpu.req_cmd === M_XWR) val req_store = (io.cpu.req.bits.cmd === M_XWR)
val req_load = (io.cpu.req_cmd === M_XRD) val req_load = (io.cpu.req.bits.cmd === M_XRD)
val req_amo = io.cpu.req_cmd(3).toBool val req_amo = io.cpu.req.bits.cmd(3).toBool
val req_read = req_load || req_amo val req_read = req_load || req_amo
val req_write = req_store || req_amo val req_write = req_store || req_amo
val r_req_load = (r_cpu_req_cmd === M_XRD) val r_req_load = (r_cpu_req_cmd === M_XRD)
@ -804,11 +839,11 @@ class HellaCache(co: CoherencePolicy) extends Component {
flusher.io.req.valid := r_cpu_req_val && r_req_flush && mshr.io.fence_rdy && !flushed flusher.io.req.valid := r_cpu_req_val && r_req_flush && mshr.io.fence_rdy && !flushed
flusher.io.mshr_req.ready := mshr.io.req.ready flusher.io.mshr_req.ready := mshr.io.req.ready
when (io.cpu.req_val) { when (io.cpu.req.valid) {
r_cpu_req_idx := io.cpu.req_idx r_cpu_req_idx := io.cpu.req.bits.idx
r_cpu_req_cmd := io.cpu.req_cmd r_cpu_req_cmd := io.cpu.req.bits.cmd
r_cpu_req_type := io.cpu.req_type r_cpu_req_type := io.cpu.req.bits.typ
r_cpu_req_tag := io.cpu.req_tag r_cpu_req_tag := io.cpu.req.bits.tag
} }
when (prober.io.meta_req.valid) { when (prober.io.meta_req.valid) {
r_cpu_req_idx := Cat(prober.io.meta_req.bits.data.tag, prober.io.meta_req.bits.idx, mshr.io.data_req.bits.offset)(PGIDX_BITS-1,0) r_cpu_req_idx := Cat(prober.io.meta_req.bits.data.tag, prober.io.meta_req.bits.idx, mshr.io.data_req.bits.offset)(PGIDX_BITS-1,0)
@ -825,15 +860,15 @@ class HellaCache(co: CoherencePolicy) extends Component {
r_cpu_req_cmd := M_FLA r_cpu_req_cmd := M_FLA
r_way_oh := flusher.io.meta_req.bits.way_en r_way_oh := flusher.io.meta_req.bits.way_en
} }
val cpu_req_data = Mux(r_replay_amo, r_amo_replay_data, io.cpu.req_data) val cpu_req_data = Mux(r_replay_amo, r_amo_replay_data, io.cpu.req.bits.data)
val misaligned = val misaligned =
(((r_cpu_req_type === MT_H) || (r_cpu_req_type === MT_HU)) && (r_cpu_req_idx(0) != Bits(0))) || (((r_cpu_req_type === MT_H) || (r_cpu_req_type === MT_HU)) && (r_cpu_req_idx(0) != Bits(0))) ||
(((r_cpu_req_type === MT_W) || (r_cpu_req_type === MT_WU)) && (r_cpu_req_idx(1,0) != Bits(0))) || (((r_cpu_req_type === MT_W) || (r_cpu_req_type === MT_WU)) && (r_cpu_req_idx(1,0) != Bits(0))) ||
((r_cpu_req_type === MT_D) && (r_cpu_req_idx(2,0) != Bits(0))); ((r_cpu_req_type === MT_D) && (r_cpu_req_idx(2,0) != Bits(0)));
io.cpu.xcpt_ma_ld := r_cpu_req_val_ && !early_nack && r_req_read && misaligned io.cpu.xcpt.ma.ld := r_cpu_req_val_ && !early_nack && r_req_read && misaligned
io.cpu.xcpt_ma_st := r_cpu_req_val_ && !early_nack && r_req_write && misaligned io.cpu.xcpt.ma.st := r_cpu_req_val_ && !early_nack && r_req_write && misaligned
// tags // tags
val meta = new MetaDataArrayArray(lines) val meta = new MetaDataArrayArray(lines)
@ -847,12 +882,12 @@ class HellaCache(co: CoherencePolicy) extends Component {
data_arb.io.out <> data.io.req data_arb.io.out <> data.io.req
// cpu tag check // cpu tag check
meta_arb.io.in(3).valid := io.cpu.req_val meta_arb.io.in(3).valid := io.cpu.req.valid
meta_arb.io.in(3).bits.idx := io.cpu.req_idx(indexmsb,indexlsb) meta_arb.io.in(3).bits.idx := io.cpu.req.bits.idx(indexmsb,indexlsb)
meta_arb.io.in(3).bits.rw := Bool(false) meta_arb.io.in(3).bits.rw := Bool(false)
meta_arb.io.in(3).bits.way_en := ~UFix(0, NWAYS) meta_arb.io.in(3).bits.way_en := ~UFix(0, NWAYS)
val early_tag_nack = !meta_arb.io.in(3).ready val early_tag_nack = !meta_arb.io.in(3).ready
val cpu_req_ppn = Mux(prober.io.mshr_req.valid, prober.io.address >> UFix(PGIDX_BITS-OFFSET_BITS), io.cpu.req_ppn) val cpu_req_ppn = Mux(prober.io.mshr_req.valid, prober.io.address >> UFix(PGIDX_BITS-OFFSET_BITS), io.cpu.req.bits.ppn)
val cpu_req_tag = Cat(cpu_req_ppn, r_cpu_req_idx)(tagmsb,taglsb) val cpu_req_tag = Cat(cpu_req_ppn, r_cpu_req_idx)(tagmsb,taglsb)
val tag_match_arr = (0 until NWAYS).map( w => co.isValid(meta.io.resp(w).state) && (meta.io.resp(w).tag === cpu_req_tag)) val tag_match_arr = (0 until NWAYS).map( w => co.isValid(meta.io.resp(w).state) && (meta.io.resp(w).tag === cpu_req_tag))
val tag_match = Cat(Bits(0),tag_match_arr:_*).orR val tag_match = Cat(Bits(0),tag_match_arr:_*).orR
@ -886,10 +921,10 @@ class HellaCache(co: CoherencePolicy) extends Component {
data_arb.io.in(0).valid := io.mem.xact_rep.valid && co.messageUpdatesDataArray(io.mem.xact_rep.bits) data_arb.io.in(0).valid := io.mem.xact_rep.valid && co.messageUpdatesDataArray(io.mem.xact_rep.bits)
// load hits // load hits
data_arb.io.in(4).bits.offset := io.cpu.req_idx(offsetmsb,ramindexlsb) data_arb.io.in(4).bits.offset := io.cpu.req.bits.idx(offsetmsb,ramindexlsb)
data_arb.io.in(4).bits.idx := io.cpu.req_idx(indexmsb,indexlsb) data_arb.io.in(4).bits.idx := io.cpu.req.bits.idx(indexmsb,indexlsb)
data_arb.io.in(4).bits.rw := Bool(false) data_arb.io.in(4).bits.rw := Bool(false)
data_arb.io.in(4).valid := io.cpu.req_val && req_read data_arb.io.in(4).valid := io.cpu.req.valid && req_read
data_arb.io.in(4).bits.way_en := ~UFix(0, NWAYS) // intiate load on all ways, mux after tag check data_arb.io.in(4).bits.way_en := ~UFix(0, NWAYS) // intiate load on all ways, mux after tag check
val early_load_nack = req_read && !data_arb.io.in(4).ready val early_load_nack = req_read && !data_arb.io.in(4).ready
@ -900,7 +935,7 @@ class HellaCache(co: CoherencePolicy) extends Component {
val p_store_idx_match = p_store_valid && (r_cpu_req_idx(indexmsb,indexlsb) === p_store_idx(indexmsb,indexlsb)) val p_store_idx_match = p_store_valid && (r_cpu_req_idx(indexmsb,indexlsb) === p_store_idx(indexmsb,indexlsb))
val p_store_offset_match = (r_cpu_req_idx(indexlsb-1,offsetlsb) === p_store_idx(indexlsb-1,offsetlsb)) val p_store_offset_match = (r_cpu_req_idx(indexlsb-1,offsetlsb) === p_store_idx(indexlsb-1,offsetlsb))
val p_store_match = r_cpu_req_val_ && r_req_read && p_store_idx_match && p_store_offset_match val p_store_match = r_cpu_req_val_ && r_req_read && p_store_idx_match && p_store_offset_match
val drain_store_val = (p_store_valid && (!io.cpu.req_val || req_write || wb.io.data_req.valid || mshr.io.data_req.valid)) || p_store_match val drain_store_val = (p_store_valid && (!io.cpu.req.valid || req_write || wb.io.data_req.valid || mshr.io.data_req.valid)) || p_store_match
data_arb.io.in(2).bits.offset := p_store_idx(offsetmsb,ramindexlsb) data_arb.io.in(2).bits.offset := p_store_idx(offsetmsb,ramindexlsb)
data_arb.io.in(2).bits.idx := p_store_idx(indexmsb,indexlsb) data_arb.io.in(2).bits.idx := p_store_idx(indexmsb,indexlsb)
data_arb.io.in(2).bits.rw := Bool(true) data_arb.io.in(2).bits.rw := Bool(true)
@ -1020,15 +1055,15 @@ class HellaCache(co: CoherencePolicy) extends Component {
!flushed && r_req_flush !flushed && r_req_flush
val nack = early_nack || r_req_readwrite && (nack_hit || nack_miss) || nack_flush val nack = early_nack || r_req_readwrite && (nack_hit || nack_miss) || nack_flush
io.cpu.req_rdy := flusher.io.req.ready && !(r_cpu_req_val_ && r_req_flush) && !pending_fence io.cpu.req.ready := flusher.io.req.ready && !(r_cpu_req_val_ && r_req_flush) && !pending_fence
io.cpu.resp_nack := r_cpu_req_val_ && !io.cpu.req_kill && nack io.cpu.resp.valid := (r_cpu_req_val && tag_hit && !mshr.io.secondary_miss && !nack && r_req_read) || mshr.io.cpu_resp_val
io.cpu.resp_val := (r_cpu_req_val && tag_hit && !mshr.io.secondary_miss && !nack && r_req_read) || mshr.io.cpu_resp_val io.cpu.resp.bits.nack := r_cpu_req_val_ && !io.cpu.req.bits.kill && nack
io.cpu.resp_replay := mshr.io.cpu_resp_val io.cpu.resp.bits.replay := mshr.io.cpu_resp_val
io.cpu.resp_miss := r_cpu_req_val_ && (!tag_hit || mshr.io.secondary_miss) && r_req_read io.cpu.resp.bits.miss := r_cpu_req_val_ && (!tag_hit || mshr.io.secondary_miss) && r_req_read
io.cpu.resp_tag := Mux(mshr.io.cpu_resp_val, mshr.io.cpu_resp_tag, r_cpu_req_tag) io.cpu.resp.bits.tag := Mux(mshr.io.cpu_resp_val, mshr.io.cpu_resp_tag, r_cpu_req_tag)
io.cpu.resp_type := loadgen.io.typ io.cpu.resp.bits.typ := loadgen.io.typ
io.cpu.resp_data := loadgen.io.dout io.cpu.resp.bits.data := loadgen.io.dout
io.cpu.resp_data_subword := loadgen.io.r_dout_subword io.cpu.resp.bits.data_subword := loadgen.io.r_dout_subword
val xact_init_arb = (new Arbiter(2)) { new TransactionInit } val xact_init_arb = (new Arbiter(2)) { new TransactionInit }
xact_init_arb.io.in(0) <> wb.io.mem_req xact_init_arb.io.in(0) <> wb.io.mem_req

View File

@ -5,81 +5,84 @@ import Node._;
import Constants._; import Constants._;
import scala.math._; import scala.math._;
class ioDmemArbiter(n: Int) extends Bundle class ioHellaCacheArbiter(n: Int) extends Bundle
{ {
val dmem = new ioDmem().flip val requestor = Vec(n) { new ioHellaCache() }.flip
val requestor = Vec(n) { new ioDmem() } val mem = new ioHellaCache
} }
class rocketDmemArbiter(n: Int) extends Component class rocketHellaCacheArbiter(n: Int) extends Component
{ {
val io = new ioDmemArbiter(n) val io = new ioHellaCacheArbiter(n)
require(DCACHE_TAG_BITS >= log2up(n) + CPU_TAG_BITS) require(DCACHE_TAG_BITS >= log2up(n) + CPU_TAG_BITS)
var req_val = Bool(false) var req_val = Bool(false)
var req_rdy = io.dmem.req_rdy var req_rdy = io.mem.req.ready
for (i <- 0 until n) for (i <- 0 until n)
{ {
io.requestor(i).req_rdy := req_rdy io.requestor(i).req.ready := req_rdy
req_val = req_val || io.requestor(i).req_val req_val = req_val || io.requestor(i).req.valid
req_rdy = req_rdy && !io.requestor(i).req_val req_rdy = req_rdy && !io.requestor(i).req.valid
} }
var req_cmd = io.requestor(n-1).req_cmd var req_cmd = io.requestor(n-1).req.bits.cmd
var req_type = io.requestor(n-1).req_type var req_type = io.requestor(n-1).req.bits.typ
var req_idx = io.requestor(n-1).req_idx var req_idx = io.requestor(n-1).req.bits.idx
var req_ppn = io.requestor(n-1).req_ppn var req_ppn = io.requestor(n-1).req.bits.ppn
var req_data = io.requestor(n-1).req_data var req_data = io.requestor(n-1).req.bits.data
var req_tag = io.requestor(n-1).req_tag var req_kill = io.requestor(n-1).req.bits.kill
var req_kill = io.requestor(n-1).req_kill var req_tag = io.requestor(n-1).req.bits.tag
for (i <- n-1 to 0 by -1) for (i <- n-1 to 0 by -1)
{ {
req_cmd = Mux(io.requestor(i).req_val, io.requestor(i).req_cmd, req_cmd) val r = io.requestor(i).req
req_type = Mux(io.requestor(i).req_val, io.requestor(i).req_type, req_type) req_cmd = Mux(r.valid, r.bits.cmd, req_cmd)
req_idx = Mux(io.requestor(i).req_val, io.requestor(i).req_idx, req_idx) req_type = Mux(r.valid, r.bits.typ, req_type)
req_ppn = Mux(Reg(io.requestor(i).req_val), io.requestor(i).req_ppn, req_ppn) req_idx = Mux(r.valid, r.bits.idx, req_idx)
req_data = Mux(Reg(io.requestor(i).req_val), io.requestor(i).req_data, req_data) req_ppn = Mux(Reg(r.valid), r.bits.ppn, req_ppn)
req_tag = Mux(io.requestor(i).req_val, Cat(io.requestor(i).req_tag, UFix(i, log2up(n))), req_tag) req_data = Mux(Reg(r.valid), r.bits.data, req_data)
req_kill = Mux(Reg(io.requestor(i).req_val), io.requestor(i).req_kill, req_kill) req_kill = Mux(Reg(r.valid), r.bits.kill, req_kill)
req_tag = Mux(r.valid, Cat(r.bits.tag, UFix(i, log2up(n))), req_tag)
} }
io.dmem.req_val := req_val io.mem.req.valid := req_val
io.dmem.req_cmd := req_cmd io.mem.req.bits.cmd := req_cmd
io.dmem.req_type := req_type io.mem.req.bits.typ := req_type
io.dmem.req_idx := req_idx io.mem.req.bits.idx := req_idx
io.dmem.req_ppn := req_ppn io.mem.req.bits.ppn := req_ppn
io.dmem.req_data := req_data io.mem.req.bits.data := req_data
io.dmem.req_tag := req_tag io.mem.req.bits.kill := req_kill
io.dmem.req_kill := req_kill io.mem.req.bits.tag := req_tag
for (i <- 0 until n) for (i <- 0 until n)
{ {
val tag_hit = io.dmem.resp_tag(log2up(n)-1,0) === UFix(i) val r = io.requestor(i).resp
io.requestor(i).xcpt_ma_ld := io.dmem.xcpt_ma_ld && Reg(io.requestor(i).req_val) val x = io.requestor(i).xcpt
io.requestor(i).xcpt_ma_st := io.dmem.xcpt_ma_st && Reg(io.requestor(i).req_val) val tag_hit = io.mem.resp.bits.tag(log2up(n)-1,0) === UFix(i)
io.requestor(i).resp_nack := io.dmem.resp_nack && Reg(io.requestor(i).req_val) x.ma.ld := io.mem.xcpt.ma.ld && Reg(io.requestor(i).req.valid)
io.requestor(i).resp_miss := io.dmem.resp_miss && tag_hit x.ma.st := io.mem.xcpt.ma.st && Reg(io.requestor(i).req.valid)
io.requestor(i).resp_val := io.dmem.resp_val && tag_hit r.valid := io.mem.resp.valid && tag_hit
io.requestor(i).resp_replay := io.dmem.resp_replay && tag_hit r.bits.miss := io.mem.resp.bits.miss && tag_hit
io.requestor(i).resp_data := io.dmem.resp_data r.bits.nack := io.mem.resp.bits.nack && Reg(io.requestor(i).req.valid)
io.requestor(i).resp_data_subword := io.dmem.resp_data_subword r.bits.replay := io.mem.resp.bits.replay && tag_hit
io.requestor(i).resp_type := io.dmem.resp_type r.bits.data := io.mem.resp.bits.data
io.requestor(i).resp_tag := io.dmem.resp_tag >> UFix(log2up(n)) r.bits.data_subword := io.mem.resp.bits.data_subword
r.bits.typ := io.mem.resp.bits.typ
r.bits.tag := io.mem.resp.bits.tag >> UFix(log2up(n))
} }
} }
class ioPTW extends Bundle class ioPTW extends Bundle
{ {
val itlb = new ioTLB_PTW().flip val itlb = (new ioTLB_PTW).flip
val dtlb = new ioTLB_PTW().flip val dtlb = (new ioTLB_PTW).flip
val vitlb = new ioTLB_PTW().flip val vitlb = (new ioTLB_PTW).flip
val dmem = new ioDmem().flip val mem = new ioHellaCache
val ptbr = UFix(PADDR_BITS, INPUT); val ptbr = UFix(PADDR_BITS, INPUT)
} }
class rocketPTW extends Component class rocketPTW extends Component
{ {
val io = new ioPTW(); val io = new ioPTW
val levels = 3 val levels = 3
val bitsPerLevel = VPN_BITS/levels val bitsPerLevel = VPN_BITS/levels
@ -123,25 +126,25 @@ class rocketPTW extends Component
req_addr := Cat(io.ptbr(PADDR_BITS-1,PGIDX_BITS), io.vitlb.req_vpn(VPN_BITS-1,VPN_BITS-bitsPerLevel), Bits(0,3)) req_addr := Cat(io.ptbr(PADDR_BITS-1,PGIDX_BITS), io.vitlb.req_vpn(VPN_BITS-1,VPN_BITS-bitsPerLevel), Bits(0,3))
} }
val dmem_resp_val = Reg(io.dmem.resp_val, resetVal = Bool(false)) val dmem_resp_val = Reg(io.mem.resp.valid, resetVal = Bool(false))
when (dmem_resp_val) { when (dmem_resp_val) {
req_addr := Cat(io.dmem.resp_data_subword(PADDR_BITS-1, PGIDX_BITS), vpn_idx, Bits(0,3)) req_addr := Cat(io.mem.resp.bits.data_subword(PADDR_BITS-1, PGIDX_BITS), vpn_idx, Bits(0,3))
r_resp_perm := io.dmem.resp_data_subword(9,4); r_resp_perm := io.mem.resp.bits.data_subword(9,4);
r_resp_ppn := io.dmem.resp_data_subword(PADDR_BITS-1, PGIDX_BITS); r_resp_ppn := io.mem.resp.bits.data_subword(PADDR_BITS-1, PGIDX_BITS);
} }
io.dmem.req_val := state === s_req io.mem.req.valid := state === s_req
io.dmem.req_cmd := M_XRD; io.mem.req.bits.cmd := M_XRD
io.dmem.req_type := MT_D; io.mem.req.bits.typ := MT_D
io.dmem.req_idx := req_addr(PGIDX_BITS-1,0); io.mem.req.bits.idx := req_addr(PGIDX_BITS-1,0)
io.dmem.req_ppn := Reg(req_addr(PADDR_BITS-1,PGIDX_BITS)) io.mem.req.bits.ppn := Reg(req_addr(PADDR_BITS-1,PGIDX_BITS))
io.dmem.req_kill := Bool(false) io.mem.req.bits.kill := Bool(false)
val resp_val = state === s_done val resp_val = state === s_done
val resp_err = state === s_error val resp_err = state === s_error
val resp_ptd = io.dmem.resp_data_subword(1,0) === Bits(1) val resp_ptd = io.mem.resp.bits.data_subword(1,0) === Bits(1)
val resp_pte = io.dmem.resp_data_subword(1,0) === Bits(2) val resp_pte = io.mem.resp.bits.data_subword(1,0) === Bits(2)
io.itlb.req_rdy := (state === s_ready) io.itlb.req_rdy := (state === s_ready)
io.dtlb.req_rdy := (state === s_ready) && !io.itlb.req_val io.dtlb.req_rdy := (state === s_ready) && !io.itlb.req_val
@ -172,12 +175,12 @@ class rocketPTW extends Component
count := UFix(0) count := UFix(0)
} }
is (s_req) { is (s_req) {
when (io.dmem.req_rdy) { when (io.mem.req.ready) {
state := s_wait; state := s_wait;
} }
} }
is (s_wait) { is (s_wait) {
when (io.dmem.resp_nack) { when (io.mem.resp.bits.nack) {
state := s_req state := s_req
} }
when (dmem_resp_val) { when (dmem_resp_val) {