remove more global consts; refactor DTLBs
D$ now contains DTLB. provide full VAddr with initial request. VU now has its own DTLBs.
This commit is contained in:
parent
e76892f758
commit
4d1ca8ba3a
@ -12,58 +12,40 @@ class HellaCacheArbiter(n: Int)(implicit conf: RocketConfiguration) extends Comp
|
||||
val mem = new ioHellaCache()(conf.dcache)
|
||||
}
|
||||
|
||||
var req_val = Bool(false)
|
||||
var req_rdy = io.mem.req.ready
|
||||
for (i <- 0 until n)
|
||||
{
|
||||
io.requestor(i).req.ready := req_rdy
|
||||
req_val = req_val || io.requestor(i).req.valid
|
||||
req_rdy = req_rdy && !io.requestor(i).req.valid
|
||||
val r_valid = io.requestor.map(r => Reg(r.req.valid))
|
||||
|
||||
io.mem.req.valid := io.requestor.map(_.req.valid).reduce(_||_)
|
||||
io.requestor(0).req.ready := io.mem.req.ready
|
||||
for (i <- 1 until n)
|
||||
io.requestor(i).req.ready := io.requestor(i-1).req.ready && !io.requestor(i-1).req.valid
|
||||
|
||||
io.mem.req.bits := io.requestor(n-1).req.bits
|
||||
io.mem.req.bits.tag := Cat(io.requestor(n-1).req.bits.tag, UFix(n-1, log2Up(n)))
|
||||
for (i <- n-2 to 0 by -1) {
|
||||
val req = io.requestor(i).req
|
||||
when (req.valid) {
|
||||
io.mem.req.bits.cmd := req.bits.cmd
|
||||
io.mem.req.bits.typ := req.bits.typ
|
||||
io.mem.req.bits.addr := req.bits.addr
|
||||
io.mem.req.bits.phys := req.bits.phys
|
||||
io.mem.req.bits.tag := Cat(req.bits.tag, UFix(i, log2Up(n)))
|
||||
}
|
||||
when (r_valid(i)) {
|
||||
io.mem.req.bits.kill := req.bits.kill
|
||||
io.mem.req.bits.data := req.bits.data
|
||||
}
|
||||
}
|
||||
|
||||
var req_cmd = io.requestor(n-1).req.bits.cmd
|
||||
var req_type = io.requestor(n-1).req.bits.typ
|
||||
var req_idx = io.requestor(n-1).req.bits.idx
|
||||
var req_ppn = io.requestor(n-1).req.bits.ppn
|
||||
var req_data = io.requestor(n-1).req.bits.data
|
||||
var req_kill = io.requestor(n-1).req.bits.kill
|
||||
var req_tag = io.requestor(n-1).req.bits.tag
|
||||
for (i <- n-1 to 0 by -1)
|
||||
{
|
||||
val r = io.requestor(i).req
|
||||
req_cmd = Mux(r.valid, r.bits.cmd, req_cmd)
|
||||
req_type = Mux(r.valid, r.bits.typ, req_type)
|
||||
req_idx = Mux(r.valid, r.bits.idx, req_idx)
|
||||
req_ppn = Mux(Reg(r.valid), r.bits.ppn, req_ppn)
|
||||
req_data = Mux(Reg(r.valid), r.bits.data, req_data)
|
||||
req_kill = Mux(Reg(r.valid), r.bits.kill, req_kill)
|
||||
req_tag = Mux(r.valid, Cat(r.bits.tag, UFix(i, log2Up(n))), req_tag)
|
||||
}
|
||||
|
||||
io.mem.req.valid := req_val
|
||||
io.mem.req.bits.cmd := req_cmd
|
||||
io.mem.req.bits.typ := req_type
|
||||
io.mem.req.bits.idx := req_idx
|
||||
io.mem.req.bits.ppn := req_ppn
|
||||
io.mem.req.bits.data := req_data
|
||||
io.mem.req.bits.kill := req_kill
|
||||
io.mem.req.bits.tag := req_tag
|
||||
|
||||
for (i <- 0 until n)
|
||||
{
|
||||
val r = io.requestor(i).resp
|
||||
val x = io.requestor(i).xcpt
|
||||
for (i <- 0 until n) {
|
||||
val resp = io.requestor(i).resp
|
||||
val tag_hit = io.mem.resp.bits.tag(log2Up(n)-1,0) === UFix(i)
|
||||
x.ma.ld := io.mem.xcpt.ma.ld && Reg(io.requestor(i).req.valid)
|
||||
x.ma.st := io.mem.xcpt.ma.st && Reg(io.requestor(i).req.valid)
|
||||
r.valid := io.mem.resp.valid && tag_hit
|
||||
r.bits.miss := io.mem.resp.bits.miss && tag_hit
|
||||
r.bits.nack := io.mem.resp.bits.nack && Reg(io.requestor(i).req.valid)
|
||||
r.bits.replay := io.mem.resp.bits.replay && tag_hit
|
||||
r.bits.data := io.mem.resp.bits.data
|
||||
r.bits.data_subword := io.mem.resp.bits.data_subword
|
||||
r.bits.typ := io.mem.resp.bits.typ
|
||||
r.bits.tag := io.mem.resp.bits.tag >> UFix(log2Up(n))
|
||||
resp.valid := io.mem.resp.valid && tag_hit
|
||||
io.requestor(i).xcpt := io.mem.xcpt
|
||||
resp.bits := io.mem.resp.bits
|
||||
resp.bits.tag := io.mem.resp.bits.tag >> UFix(log2Up(n))
|
||||
resp.bits.miss := io.mem.resp.bits.miss && tag_hit
|
||||
resp.bits.nack := io.mem.resp.bits.nack && r_valid(i)
|
||||
resp.bits.replay := io.mem.resp.bits.replay && tag_hit
|
||||
}
|
||||
}
|
||||
|
||||
@ -80,15 +62,6 @@ class MemArbiter(n: Int) extends Component {
|
||||
val requestor = Vec(n) { new ioUncachedRequestor }.flip
|
||||
}
|
||||
|
||||
var xi_val = Bool(false)
|
||||
var xi_rdy = io.mem.xact_init.ready
|
||||
for (i <- 0 until n)
|
||||
{
|
||||
io.requestor(i).xact_init.ready := xi_rdy
|
||||
xi_val = xi_val || io.requestor(i).xact_init.valid
|
||||
xi_rdy = xi_rdy && !io.requestor(i).xact_init.valid
|
||||
}
|
||||
|
||||
var xi_bits = new TransactionInit
|
||||
xi_bits := io.requestor(n-1).xact_init.bits
|
||||
xi_bits.tile_xact_id := Cat(io.requestor(n-1).xact_init.bits.tile_xact_id, UFix(n-1, log2Up(n)))
|
||||
@ -101,24 +74,21 @@ class MemArbiter(n: Int) extends Component {
|
||||
xi_bits = Mux(io.requestor(i).xact_init.valid, my_xi_bits, xi_bits)
|
||||
}
|
||||
|
||||
io.mem.xact_init.valid := xi_val
|
||||
io.mem.xact_init.bits := xi_bits
|
||||
|
||||
var xf_val = Bool(false)
|
||||
var xf_rdy = io.mem.xact_finish.ready
|
||||
for (i <- 0 until n)
|
||||
{
|
||||
io.requestor(i).xact_finish.ready := xf_rdy
|
||||
xf_val = xf_val || io.requestor(i).xact_finish.valid
|
||||
xf_rdy = xf_rdy && !io.requestor(i).xact_finish.valid
|
||||
}
|
||||
io.mem.xact_init.valid := io.requestor.map(_.xact_init.valid).reduce(_||_)
|
||||
io.requestor(0).xact_init.ready := io.mem.xact_init.ready
|
||||
for (i <- 1 until n)
|
||||
io.requestor(i).xact_init.ready := io.requestor(i-1).xact_init.ready && !io.requestor(i-1).xact_init.valid
|
||||
|
||||
var xf_bits = io.requestor(n-1).xact_finish.bits
|
||||
for (i <- n-2 to 0 by -1)
|
||||
xf_bits = Mux(io.requestor(i).xact_finish.valid, io.requestor(i).xact_finish.bits, xf_bits)
|
||||
|
||||
io.mem.xact_finish.valid := xf_val
|
||||
io.mem.xact_finish.bits := xf_bits
|
||||
io.mem.xact_finish.valid := io.requestor.map(_.xact_finish.valid).reduce(_||_)
|
||||
io.requestor(0).xact_finish.ready := io.mem.xact_finish.ready
|
||||
for (i <- 1 until n)
|
||||
io.requestor(i).xact_finish.ready := io.requestor(i-1).xact_finish.ready && !io.requestor(i-1).xact_finish.valid
|
||||
|
||||
for (i <- 0 until n)
|
||||
{
|
||||
|
@ -154,12 +154,7 @@ trait InterruptConstants {
|
||||
val IRQ_TIMER = 7
|
||||
}
|
||||
|
||||
abstract trait RocketDcacheConstants extends ArbiterConstants with uncore.constants.AddressConstants {
|
||||
val NMSHR = if (HAVE_VEC) 4 else 2 // number of primary misses
|
||||
require(log2Up(NMSHR)+3 <= uncore.Constants.TILE_XACT_ID_BITS)
|
||||
val NRPQ = 16; // number of secondary misses
|
||||
val NSDQ = 17; // number of secondary stores/AMOs
|
||||
val OFFSET_BITS = 6; // log2(cache line size in bytes)
|
||||
abstract trait RocketDcacheConstants extends TileConfigConstants with uncore.constants.CacheConstants with uncore.constants.AddressConstants {
|
||||
require(OFFSET_BITS == log2Up(uncore.Constants.CACHE_DATA_SIZE_IN_BYTES))
|
||||
require(OFFSET_BITS <= uncore.Constants.X_INIT_WRITE_MASK_BITS)
|
||||
require(log2Up(OFFSET_BITS) <= uncore.Constants.X_INIT_SUBWORD_ADDR_BITS)
|
||||
@ -196,15 +191,3 @@ trait VectorOpConstants {
|
||||
val VIMM2_ALU = UFix(1, 1)
|
||||
val VIMM2_X = UFix(0, 1)
|
||||
}
|
||||
|
||||
abstract trait ArbiterConstants extends TileConfigConstants {
|
||||
val DCACHE_PORTS = 3
|
||||
val DCACHE_CPU = 0
|
||||
val DCACHE_PTW = 1
|
||||
val DCACHE_VU = 2
|
||||
|
||||
val DMEM_PORTS = if (HAVE_VEC) 3 else 2
|
||||
val DMEM_DCACHE = 0
|
||||
val DMEM_ICACHE = 1
|
||||
val DMEM_VICACHE = 2
|
||||
}
|
||||
|
@ -13,87 +13,57 @@ class ioRocket(implicit conf: RocketConfiguration) extends Bundle
|
||||
val dmem = new ioHellaCache()(conf.dcache)
|
||||
}
|
||||
|
||||
class rocketProc(implicit conf: RocketConfiguration) extends Component
|
||||
class Core(implicit conf: RocketConfiguration) extends Component
|
||||
{
|
||||
val io = new ioRocket
|
||||
|
||||
val ctrl = new Control
|
||||
val dpath = new Datapath
|
||||
|
||||
val ptw = Vec(0) { new IOTLBPTW }
|
||||
val arb = new HellaCacheArbiter(DCACHE_PORTS)
|
||||
|
||||
var vu: vu = null
|
||||
if (HAVE_VEC)
|
||||
{
|
||||
vu = new vu()
|
||||
|
||||
val vdtlb = new rocketTLB(8)
|
||||
vdtlb.io.invalidate := dpath.io.ptbr_wen
|
||||
vdtlb.io.status := dpath.io.ctrl.status
|
||||
ptw += vdtlb.io.ptw
|
||||
|
||||
vdtlb.io.cpu_req <> vu.io.vec_tlb_req
|
||||
vu.io.vec_tlb_resp := vdtlb.io.cpu_resp
|
||||
vu.io.vec_tlb_resp.xcpt_pf := Bool(false)
|
||||
|
||||
val pftlb = new rocketTLB(2)
|
||||
pftlb.io.invalidate := dpath.io.ptbr_wen
|
||||
pftlb.io.status := dpath.io.ctrl.status
|
||||
pftlb.io.cpu_req <> vu.io.vec_pftlb_req
|
||||
ptw += pftlb.io.ptw
|
||||
|
||||
vu.io.vec_pftlb_resp := pftlb.io.cpu_resp
|
||||
vu.io.vec_pftlb_resp.xcpt_ld := Bool(false)
|
||||
vu.io.vec_pftlb_resp.xcpt_st := Bool(false)
|
||||
}
|
||||
|
||||
// connect DTLB to ctrl+dpath
|
||||
val dtlb = new rocketTLB(DTLB_ENTRIES)
|
||||
dtlb.io.invalidate := dpath.io.ptbr_wen
|
||||
dtlb.io.status := dpath.io.ctrl.status
|
||||
ptw += dtlb.io.ptw
|
||||
|
||||
dtlb.io.cpu_req.valid := ctrl.io.dtlb_val
|
||||
dtlb.io.cpu_req.bits.kill := ctrl.io.dtlb_kill
|
||||
dtlb.io.cpu_req.bits.cmd := ctrl.io.dmem.req.bits.cmd
|
||||
dtlb.io.cpu_req.bits.asid := UFix(0)
|
||||
dtlb.io.cpu_req.bits.vpn := dpath.io.dtlb.vpn
|
||||
ctrl.io.xcpt_dtlb_ld := dtlb.io.cpu_resp.xcpt_ld
|
||||
ctrl.io.xcpt_dtlb_st := dtlb.io.cpu_resp.xcpt_st
|
||||
ctrl.io.dtlb_rdy := dtlb.io.cpu_req.ready
|
||||
ctrl.io.dtlb_miss := dtlb.io.cpu_resp.miss
|
||||
|
||||
arb.io.requestor(DCACHE_CPU).req.bits.ppn := dtlb.io.cpu_resp.ppn
|
||||
|
||||
ctrl.io.dpath <> dpath.io.ctrl
|
||||
dpath.io.host <> io.host
|
||||
|
||||
ctrl.io.imem <> io.imem
|
||||
dpath.io.imem <> io.imem
|
||||
|
||||
ctrl.io.dmem <> arb.io.requestor(DCACHE_CPU)
|
||||
dpath.io.dmem <> arb.io.requestor(DCACHE_CPU)
|
||||
val dmemArb = new HellaCacheArbiter(if (HAVE_VEC) 3 else 2)
|
||||
dmemArb.io.mem <> io.dmem
|
||||
val dmem = dmemArb.io.requestor
|
||||
dmem(1) <> ctrl.io.dmem
|
||||
dmem(1) <> dpath.io.dmem
|
||||
|
||||
var fpu: rocketFPU = null
|
||||
if (HAVE_FPU)
|
||||
{
|
||||
fpu = new rocketFPU(4,6)
|
||||
val ptw = collection.mutable.ArrayBuffer(io.imem.ptw, io.dmem.ptw)
|
||||
|
||||
val fpu: FPU = if (HAVE_FPU) {
|
||||
val fpu = new FPU(4,6)
|
||||
dpath.io.fpu <> fpu.io.dpath
|
||||
ctrl.io.fpu <> fpu.io.ctrl
|
||||
}
|
||||
fpu
|
||||
} else null
|
||||
|
||||
if (HAVE_VEC) {
|
||||
val vu = new vu()
|
||||
|
||||
val vdtlb = new rocketTLB(8)
|
||||
ptw += vdtlb.io.ptw
|
||||
vdtlb.io.cpu_req <> vu.io.vec_tlb_req
|
||||
vu.io.vec_tlb_resp := vdtlb.io.cpu_resp
|
||||
vu.io.vec_tlb_resp.xcpt_pf := Bool(false)
|
||||
|
||||
val pftlb = new rocketTLB(2)
|
||||
pftlb.io.cpu_req <> vu.io.vec_pftlb_req
|
||||
ptw += pftlb.io.ptw
|
||||
vu.io.vec_pftlb_resp := pftlb.io.cpu_resp
|
||||
vu.io.vec_pftlb_resp.xcpt_ld := Bool(false)
|
||||
vu.io.vec_pftlb_resp.xcpt_st := Bool(false)
|
||||
|
||||
if (HAVE_VEC)
|
||||
{
|
||||
dpath.io.vec_ctrl <> ctrl.io.vec_dpath
|
||||
|
||||
// hooking up vector I$
|
||||
ptw += io.vimem.ptw
|
||||
io.vimem.req.bits.status := dpath.io.ctrl.status
|
||||
io.vimem.req.bits.pc := vu.io.imem_req.bits
|
||||
io.vimem.req.valid := vu.io.imem_req.valid
|
||||
io.vimem.req.bits.invalidate := ctrl.io.dpath.flush_inst
|
||||
io.vimem.req.bits.invalidateTLB := dpath.io.ptbr_wen
|
||||
vu.io.imem_resp.valid := io.vimem.resp.valid
|
||||
vu.io.imem_resp.bits.pc := io.vimem.resp.bits.pc
|
||||
vu.io.imem_resp.bits.data := io.vimem.resp.bits.data
|
||||
@ -155,21 +125,16 @@ class rocketProc(implicit conf: RocketConfiguration) extends Component
|
||||
vu.io.xcpt.hold := ctrl.io.vec_iface.hold
|
||||
|
||||
// hooking up vector memory interface
|
||||
arb.io.requestor(DCACHE_VU).req.valid := vu.io.dmem_req.valid
|
||||
arb.io.requestor(DCACHE_VU).req.bits.kill := vu.io.dmem_req.bits.kill
|
||||
arb.io.requestor(DCACHE_VU).req.bits.cmd := vu.io.dmem_req.bits.cmd
|
||||
arb.io.requestor(DCACHE_VU).req.bits.typ := vu.io.dmem_req.bits.typ
|
||||
arb.io.requestor(DCACHE_VU).req.bits.idx := vu.io.dmem_req.bits.idx
|
||||
arb.io.requestor(DCACHE_VU).req.bits.ppn := Reg(vu.io.dmem_req.bits.ppn)
|
||||
arb.io.requestor(DCACHE_VU).req.bits.data := Reg(StoreGen(vu.io.dmem_req.bits.typ, Bits(0), vu.io.dmem_req.bits.data).data)
|
||||
arb.io.requestor(DCACHE_VU).req.bits.tag := vu.io.dmem_req.bits.tag
|
||||
dmem(2).req.valid := vu.io.dmem_req.valid
|
||||
dmem(2).req.bits := vu.io.dmem_req.bits
|
||||
dmem(2).req.bits.data := Reg(StoreGen(vu.io.dmem_req.bits.typ, Bits(0), vu.io.dmem_req.bits.data).data)
|
||||
|
||||
vu.io.dmem_req.ready := arb.io.requestor(DCACHE_VU).req.ready
|
||||
vu.io.dmem_resp.valid := Reg(arb.io.requestor(DCACHE_VU).resp.valid)
|
||||
vu.io.dmem_resp.bits.nack := arb.io.requestor(DCACHE_VU).resp.bits.nack
|
||||
vu.io.dmem_resp.bits.data := arb.io.requestor(DCACHE_VU).resp.bits.data_subword
|
||||
vu.io.dmem_resp.bits.tag := Reg(arb.io.requestor(DCACHE_VU).resp.bits.tag)
|
||||
vu.io.dmem_resp.bits.typ := Reg(arb.io.requestor(DCACHE_VU).resp.bits.typ)
|
||||
vu.io.dmem_req.ready := dmem(2).req.ready
|
||||
vu.io.dmem_resp.valid := Reg(dmem(2).resp.valid)
|
||||
vu.io.dmem_resp.bits.nack := dmem(2).resp.bits.nack
|
||||
vu.io.dmem_resp.bits.data := dmem(2).resp.bits.data_subword
|
||||
vu.io.dmem_resp.bits.tag := Reg(dmem(2).resp.bits.tag)
|
||||
vu.io.dmem_resp.bits.typ := Reg(dmem(2).resp.bits.typ)
|
||||
|
||||
// share vector integer multiplier with rocket
|
||||
dpath.io.vec_imul_req <> vu.io.cp_imul_req
|
||||
@ -178,22 +143,13 @@ class rocketProc(implicit conf: RocketConfiguration) extends Component
|
||||
// share sfma and dfma pipelines with rocket
|
||||
fpu.io.sfma <> vu.io.cp_sfma
|
||||
fpu.io.dfma <> vu.io.cp_dfma
|
||||
}
|
||||
else
|
||||
{
|
||||
arb.io.requestor(DCACHE_VU).req.valid := Bool(false)
|
||||
if (HAVE_FPU)
|
||||
{
|
||||
fpu.io.sfma.valid := Bool(false)
|
||||
fpu.io.dfma.valid := Bool(false)
|
||||
}
|
||||
} else if (fpu != null) {
|
||||
fpu.io.sfma.valid := Bool(false)
|
||||
fpu.io.dfma.valid := Bool(false)
|
||||
}
|
||||
|
||||
ptw += io.imem.ptw
|
||||
val thePTW = new PTW(ptw.length)
|
||||
thePTW.io.requestor <> ptw
|
||||
thePTW.io.ptbr := dpath.io.ptbr;
|
||||
arb.io.requestor(DCACHE_PTW) <> thePTW.io.mem
|
||||
|
||||
arb.io.mem <> io.dmem
|
||||
ptw zip thePTW.io.requestor map { case (a, b) => a <> b }
|
||||
thePTW.io.dpath <> dpath.io.ptw
|
||||
dmem(0) <> thePTW.io.mem
|
||||
}
|
@ -569,8 +569,8 @@ class Control(implicit conf: RocketConfiguration) extends Component
|
||||
(mem_reg_xcpt_interrupt || mem_reg_xcpt, mem_reg_cause),
|
||||
(mem_reg_mem_val && io.dmem.xcpt.ma.ld, UFix( 8)),
|
||||
(mem_reg_mem_val && io.dmem.xcpt.ma.st, UFix( 9)),
|
||||
(mem_reg_mem_val && io.xcpt_dtlb_ld, UFix(10)),
|
||||
(mem_reg_mem_val && io.xcpt_dtlb_st, UFix(11))))
|
||||
(mem_reg_mem_val && io.dmem.xcpt.pf.ld, UFix(10)),
|
||||
(mem_reg_mem_val && io.dmem.xcpt.pf.st, UFix(11))))
|
||||
|
||||
wb_reg_xcpt := mem_xcpt && !take_pc_wb && !wb_reg_replay_next
|
||||
when (mem_xcpt) { wb_reg_cause := mem_cause }
|
||||
@ -644,7 +644,7 @@ class Control(implicit conf: RocketConfiguration) extends Component
|
||||
|
||||
// replay inst in ex stage
|
||||
val replay_ex = wb_reg_dcache_miss && ex_reg_load_use || mem_reg_flush_inst ||
|
||||
ex_reg_mem_val && !(io.dmem.req.ready && io.dtlb_rdy) ||
|
||||
ex_reg_mem_val && !io.dmem.req.ready ||
|
||||
ex_reg_div_val && !io.dpath.div_rdy ||
|
||||
ex_reg_mul_val && !io.dpath.mul_rdy ||
|
||||
mem_reg_replay_next
|
||||
@ -652,7 +652,7 @@ class Control(implicit conf: RocketConfiguration) extends Component
|
||||
|
||||
// replay inst in mem stage
|
||||
val mem_ll_wb = io.dpath.mem_wb || io.dpath.mul_result_val || io.dpath.div_result_val
|
||||
val dmem_kill_mem = mem_reg_valid && (io.dtlb_miss || io.dmem.resp.bits.nack)
|
||||
val dmem_kill_mem = mem_reg_valid && io.dmem.resp.bits.nack
|
||||
val fpu_kill_mem = mem_reg_fp_val && io.fpu.nack_mem
|
||||
val replay_mem = dmem_kill_mem || mem_reg_wen && mem_ll_wb || mem_reg_replay || fpu_kill_mem
|
||||
val killm_common = mem_reg_wen && mem_ll_wb || take_pc_wb || mem_reg_xcpt || !mem_reg_valid
|
||||
@ -734,7 +734,7 @@ class Control(implicit conf: RocketConfiguration) extends Component
|
||||
id_ex_hazard || id_mem_hazard || id_wb_hazard ||
|
||||
id_stall_raddr1 || id_stall_raddr2 || id_stall_waddr ||
|
||||
id_fp_val && id_stall_fpu ||
|
||||
id_mem_val && !(io.dmem.req.ready && io.dtlb_rdy) ||
|
||||
id_mem_val && !io.dmem.req.ready ||
|
||||
vec_stalld
|
||||
ctrl_killd := !io.imem.resp.valid || take_pc || ctrl_stalld || id_interrupt
|
||||
|
||||
@ -772,10 +772,9 @@ class Control(implicit conf: RocketConfiguration) extends Component
|
||||
io.fpu.killx := ctrl_killx
|
||||
io.fpu.killm := killm_common
|
||||
|
||||
io.dtlb_val := ex_reg_mem_val
|
||||
io.dtlb_kill := !mem_reg_valid
|
||||
io.dmem.req.valid := ex_reg_mem_val
|
||||
io.dmem.req.bits.kill := killm_common || mem_xcpt || io.dtlb_miss
|
||||
io.dmem.req.bits.kill := killm_common || mem_xcpt
|
||||
io.dmem.req.bits.cmd := ex_reg_mem_cmd
|
||||
io.dmem.req.bits.typ := ex_reg_mem_type
|
||||
io.dmem.req.bits.phys := Bool(false)
|
||||
}
|
||||
|
@ -13,9 +13,8 @@ class Datapath(implicit conf: RocketConfiguration) extends Component
|
||||
val ctrl = new ioCtrlDpath().flip
|
||||
val dmem = new ioHellaCache()(conf.dcache)
|
||||
val dtlb = new ioDTLB_CPU_req_bundle().asOutput()
|
||||
val ptw = new IODatapathPTW().flip
|
||||
val imem = new IOCPUFrontend()(conf.icache)
|
||||
val ptbr_wen = Bool(OUTPUT);
|
||||
val ptbr = UFix(OUTPUT, PADDR_BITS);
|
||||
val fpu = new ioDpathFPU();
|
||||
val vec_ctrl = new ioCtrlDpathVec().flip
|
||||
val vec_iface = new ioDpathVecInterface()
|
||||
@ -81,9 +80,7 @@ class Datapath(implicit conf: RocketConfiguration) extends Component
|
||||
val ex_effective_address = Cat(ex_ea_sign, ex_alu_adder_out(VADDR_BITS-1,0)).toUFix
|
||||
|
||||
// hook up I$
|
||||
io.imem.req.bits.invalidateTLB := pcr.io.ptbr_wen
|
||||
io.imem.req.bits.currentpc := ex_reg_pc
|
||||
io.imem.req.bits.status := pcr.io.status
|
||||
io.imem.req.bits.pc :=
|
||||
Mux(io.ctrl.sel_pc === PC_EX4, ex_pc_plus4,
|
||||
Mux(io.ctrl.sel_pc === PC_EX, Mux(io.ctrl.ex_jalr, ex_effective_address, ex_branch_target),
|
||||
@ -209,7 +206,7 @@ class Datapath(implicit conf: RocketConfiguration) extends Component
|
||||
|
||||
// D$ request interface (registered inside D$ module)
|
||||
// other signals (req_val, req_rdy) connect to control module
|
||||
io.dmem.req.bits.idx := ex_effective_address
|
||||
io.dmem.req.bits.addr := ex_effective_address
|
||||
io.dmem.req.bits.data := Mux(io.ctrl.mem_fp_val, io.fpu.store_data, mem_reg_rs2)
|
||||
io.dmem.req.bits.tag := Cat(ex_reg_waddr, io.ctrl.ex_fp_val)
|
||||
require(io.dmem.req.bits.tag.getWidth >= 6)
|
||||
@ -225,8 +222,10 @@ class Datapath(implicit conf: RocketConfiguration) extends Component
|
||||
io.ctrl.irq_ipi := pcr.io.irq_ipi;
|
||||
io.ctrl.status := pcr.io.status;
|
||||
io.ctrl.pcr_replay := pcr.io.replay
|
||||
io.ptbr := pcr.io.ptbr;
|
||||
io.ptbr_wen := pcr.io.ptbr_wen;
|
||||
|
||||
io.ptw.ptbr := pcr.io.ptbr
|
||||
io.ptw.invalidate := pcr.io.ptbr_wen
|
||||
io.ptw.status := pcr.io.status
|
||||
|
||||
// branch resolution logic
|
||||
io.ctrl.jalr_eq := ex_reg_rs1 === id_pc.toFix && ex_reg_op2(id_imm_small.getWidth-1,0) === UFix(0)
|
||||
|
@ -5,8 +5,9 @@ import Node._
|
||||
import Constants._
|
||||
import Instructions._
|
||||
import Util._
|
||||
import FPConstants._
|
||||
|
||||
object rocketFPConstants
|
||||
object FPConstants
|
||||
{
|
||||
val FCMD_ADD = Bits("b000000")
|
||||
val FCMD_SUB = Bits("b000001")
|
||||
@ -45,7 +46,6 @@ object rocketFPConstants
|
||||
val FCMD_WIDTH = 6
|
||||
val FSR_WIDTH = 8
|
||||
}
|
||||
import rocketFPConstants._
|
||||
|
||||
class FPUCtrlSigs extends Bundle
|
||||
{
|
||||
@ -64,7 +64,7 @@ class FPUCtrlSigs extends Bundle
|
||||
val wrfsr = Bool()
|
||||
}
|
||||
|
||||
class rocketFPUDecoder extends Component
|
||||
class FPUDecoder extends Component
|
||||
{
|
||||
val io = new Bundle {
|
||||
val inst = Bits(INPUT, 32)
|
||||
@ -378,7 +378,7 @@ class ioFMA(width: Int) extends Bundle {
|
||||
val exc = Bits(OUTPUT, 5)
|
||||
}
|
||||
|
||||
class rocketFPUSFMAPipe(val latency: Int) extends Component
|
||||
class FPUSFMAPipe(val latency: Int) extends Component
|
||||
{
|
||||
val io = new ioFMA(33)
|
||||
|
||||
@ -415,7 +415,7 @@ class rocketFPUSFMAPipe(val latency: Int) extends Component
|
||||
io.exc := Pipe(valid, fma.io.exceptionFlags, latency-1).bits
|
||||
}
|
||||
|
||||
class rocketFPUDFMAPipe(val latency: Int) extends Component
|
||||
class FPUDFMAPipe(val latency: Int) extends Component
|
||||
{
|
||||
val io = new ioFMA(65)
|
||||
|
||||
@ -452,7 +452,7 @@ class rocketFPUDFMAPipe(val latency: Int) extends Component
|
||||
io.exc := Pipe(valid, fma.io.exceptionFlags, latency-1).bits
|
||||
}
|
||||
|
||||
class rocketFPU(sfma_latency: Int, dfma_latency: Int) extends Component
|
||||
class FPU(sfma_latency: Int, dfma_latency: Int) extends Component
|
||||
{
|
||||
val io = new Bundle {
|
||||
val ctrl = new ioCtrlFPU().flip
|
||||
@ -470,7 +470,7 @@ class rocketFPU(sfma_latency: Int, dfma_latency: Int) extends Component
|
||||
val killm = io.ctrl.killm || io.ctrl.nack_mem
|
||||
val wb_reg_valid = Reg(mem_reg_valid && !killm, resetVal = Bool(false))
|
||||
|
||||
val fp_decoder = new rocketFPUDecoder
|
||||
val fp_decoder = new FPUDecoder
|
||||
fp_decoder.io.inst := io.dpath.inst
|
||||
|
||||
val ctrl = RegEn(fp_decoder.io.sigs, io.ctrl.valid)
|
||||
@ -530,7 +530,7 @@ class rocketFPU(sfma_latency: Int, dfma_latency: Int) extends Component
|
||||
val cmd_fma = mem_ctrl.cmd === FCMD_MADD || mem_ctrl.cmd === FCMD_MSUB ||
|
||||
mem_ctrl.cmd === FCMD_NMADD || mem_ctrl.cmd === FCMD_NMSUB
|
||||
val cmd_addsub = mem_ctrl.cmd === FCMD_ADD || mem_ctrl.cmd === FCMD_SUB
|
||||
val sfma = new rocketFPUSFMAPipe(sfma_latency)
|
||||
val sfma = new FPUSFMAPipe(sfma_latency)
|
||||
sfma.io.valid := io.sfma.valid || ex_reg_valid && ctrl.fma && ctrl.single
|
||||
sfma.io.in1 := Mux(io.sfma.valid, io.sfma.in1, ex_rs1)
|
||||
sfma.io.in2 := Mux(io.sfma.valid, io.sfma.in2, ex_rs2)
|
||||
@ -540,7 +540,7 @@ class rocketFPU(sfma_latency: Int, dfma_latency: Int) extends Component
|
||||
io.sfma.out := sfma.io.out
|
||||
io.sfma.exc := sfma.io.exc
|
||||
|
||||
val dfma = new rocketFPUDFMAPipe(dfma_latency)
|
||||
val dfma = new FPUDFMAPipe(dfma_latency)
|
||||
dfma.io.valid := io.dfma.valid || ex_reg_valid && ctrl.fma && !ctrl.single
|
||||
dfma.io.in1 := Mux(io.dfma.valid, io.dfma.in1, ex_rs1)
|
||||
dfma.io.in2 := Mux(io.dfma.valid, io.dfma.in2, ex_rs2)
|
||||
|
@ -29,9 +29,7 @@ case class ICacheConfig(sets: Int, assoc: Int, co: CoherencePolicyWithUncached,
|
||||
|
||||
class FrontendReq extends Bundle {
|
||||
val pc = UFix(width = VADDR_BITS+1)
|
||||
val status = Bits(width = 32)
|
||||
val invalidate = Bool()
|
||||
val invalidateTLB = Bool()
|
||||
val mispredict = Bool()
|
||||
val taken = Bool()
|
||||
val currentpc = UFix(width = VADDR_BITS+1)
|
||||
@ -99,14 +97,13 @@ class Frontend(implicit c: ICacheConfig) extends Component
|
||||
btb.io.clr := !io.cpu.req.bits.taken
|
||||
btb.io.correct_pc := io.cpu.req.bits.currentpc
|
||||
btb.io.correct_target := io.cpu.req.bits.pc
|
||||
btb.io.invalidate := io.cpu.req.bits.invalidate || io.cpu.req.bits.invalidateTLB
|
||||
btb.io.invalidate := io.cpu.req.bits.invalidate || io.cpu.ptw.invalidate
|
||||
|
||||
tlb.io.ptw <> io.cpu.ptw
|
||||
tlb.io.req.valid := !stall && !icmiss
|
||||
tlb.io.req.bits.vpn := s1_pc >> UFix(PGIDX_BITS)
|
||||
tlb.io.req.bits.status := io.cpu.req.bits.status
|
||||
tlb.io.req.bits.asid := UFix(0)
|
||||
tlb.io.req.bits.invalidate := io.cpu.req.bits.invalidateTLB
|
||||
tlb.io.req.bits.passthrough := Bool(false)
|
||||
tlb.io.req.bits.instruction := Bool(true)
|
||||
|
||||
icache.io.mem <> io.mem
|
||||
|
@ -3,16 +3,18 @@ package rocket
|
||||
import Chisel._
|
||||
import Constants._
|
||||
import uncore._
|
||||
import Util._
|
||||
|
||||
case class DCacheConfig(sets: Int, ways: Int, co: CoherencePolicy,
|
||||
nmshr: Int, nsecondary: Int, nsdq: Int,
|
||||
nmshr: Int, nrpq: Int, nsdq: Int,
|
||||
reqtagbits: Int = -1)
|
||||
{
|
||||
require(isPow2(sets))
|
||||
require(isPow2(ways)) // TODO: relax this
|
||||
def lines = sets*ways
|
||||
def dm = ways == 1
|
||||
def ppnbits = PPN_BITS
|
||||
def ppnbits = PADDR_BITS - PGIDX_BITS
|
||||
def vpnbits = VADDR_BITS - PGIDX_BITS
|
||||
def pgidxbits = PGIDX_BITS
|
||||
def offbits = OFFSET_BITS
|
||||
def paddrbits = ppnbits + pgidxbits
|
||||
@ -161,7 +163,7 @@ class MSHR(id: Int)(implicit conf: DCacheConfig) extends Component {
|
||||
val req_sec_val = Bool(INPUT)
|
||||
val req_sec_rdy = Bool(OUTPUT)
|
||||
val req_bits = new MSHRReq().asInput
|
||||
val req_sdq_id = UFix(INPUT, log2Up(NSDQ))
|
||||
val req_sdq_id = UFix(INPUT, log2Up(conf.nsdq))
|
||||
|
||||
val idx_match = Bool(OUTPUT)
|
||||
val idx = Bits(OUTPUT, conf.idxbits)
|
||||
@ -194,7 +196,7 @@ class MSHR(id: Int)(implicit conf: DCacheConfig) extends Component {
|
||||
val idx_match = req.idx === io.req_bits.idx
|
||||
val sec_rdy = idx_match && !flush && (state === s_wb_req || state === s_wb_resp || state === s_meta_clear || (state === s_refill_req || state === s_refill_resp) && !conf.co.needsTransactionOnSecondaryMiss(req_cmd, io.mem_req.bits))
|
||||
|
||||
val rpq = (new Queue(NRPQ)) { new RPQEntry }
|
||||
val rpq = (new Queue(conf.nrpq)) { new RPQEntry }
|
||||
rpq.io.enq.valid := (io.req_pri_val && io.req_pri_rdy || io.req_sec_val && sec_rdy) && req_use_rpq
|
||||
rpq.io.enq.bits := io.req_bits
|
||||
rpq.io.enq.bits.sdq_id := io.req_sdq_id
|
||||
@ -312,24 +314,24 @@ class MSHRFile(implicit conf: DCacheConfig) extends Component {
|
||||
val cpu_resp_tag = Bits(OUTPUT, conf.reqtagbits)
|
||||
}
|
||||
|
||||
val sdq_val = Reg(resetVal = Bits(0, NSDQ))
|
||||
val sdq_alloc_id = PriorityEncoder(~sdq_val(NSDQ-1,0))
|
||||
val sdq_val = Reg(resetVal = Bits(0, conf.nsdq))
|
||||
val sdq_alloc_id = PriorityEncoder(~sdq_val(conf.nsdq-1,0))
|
||||
val sdq_rdy = !sdq_val.andR
|
||||
val (req_read, req_write) = cpuCmdToRW(io.req.bits.cmd)
|
||||
val sdq_enq = io.req.valid && io.req.ready && req_write
|
||||
val sdq = Mem(NSDQ) { io.req.bits.data.clone }
|
||||
val sdq = Mem(conf.nsdq) { io.req.bits.data.clone }
|
||||
when (sdq_enq) { sdq(sdq_alloc_id) := io.req.bits.data }
|
||||
|
||||
val idxMatch = Vec(NMSHR) { Bool() }
|
||||
val tagList = Vec(NMSHR) { Bits() }
|
||||
val wbTagList = Vec(NMSHR) { Bits() }
|
||||
val memRespMux = Vec(NMSHR) { new DataArrayReq }
|
||||
val meta_req_arb = (new Arbiter(NMSHR)) { new MetaArrayReq() }
|
||||
val mem_req_arb = (new Arbiter(NMSHR)) { new TransactionInit }
|
||||
val mem_finish_arb = (new Arbiter(NMSHR)) { new TransactionFinish }
|
||||
val wb_req_arb = (new Arbiter(NMSHR)) { new WritebackReq }
|
||||
val replay_arb = (new Arbiter(NMSHR)) { new Replay() }
|
||||
val alloc_arb = (new Arbiter(NMSHR)) { Bool() }
|
||||
val idxMatch = Vec(conf.nmshr) { Bool() }
|
||||
val tagList = Vec(conf.nmshr) { Bits() }
|
||||
val wbTagList = Vec(conf.nmshr) { Bits() }
|
||||
val memRespMux = Vec(conf.nmshr) { new DataArrayReq }
|
||||
val meta_req_arb = (new Arbiter(conf.nmshr)) { new MetaArrayReq() }
|
||||
val mem_req_arb = (new Arbiter(conf.nmshr)) { new TransactionInit }
|
||||
val mem_finish_arb = (new Arbiter(conf.nmshr)) { new TransactionFinish }
|
||||
val wb_req_arb = (new Arbiter(conf.nmshr)) { new WritebackReq }
|
||||
val replay_arb = (new Arbiter(conf.nmshr)) { new Replay() }
|
||||
val alloc_arb = (new Arbiter(conf.nmshr)) { Bool() }
|
||||
|
||||
val tag_match = Mux1H(idxMatch, tagList) === io.req.bits.tag
|
||||
val wb_probe_match = Mux1H(idxMatch, wbTagList) === io.req.bits.tag
|
||||
@ -341,7 +343,7 @@ class MSHRFile(implicit conf: DCacheConfig) extends Component {
|
||||
var writeback_probe_rdy = Bool(true)
|
||||
var refill_probe_rdy = Bool(true)
|
||||
|
||||
for (i <- 0 to NMSHR-1) {
|
||||
for (i <- 0 to conf.nmshr-1) {
|
||||
val mshr = new MSHR(i)
|
||||
|
||||
idxMatch(i) := mshr.io.idx_match
|
||||
@ -400,8 +402,8 @@ class MSHRFile(implicit conf: DCacheConfig) extends Component {
|
||||
|
||||
val (replay_read, replay_write) = cpuCmdToRW(replay.bits.cmd)
|
||||
val sdq_free = replay.valid && replay.ready && replay_write
|
||||
sdq_val := sdq_val & ~((UFix(1) << replay.bits.sdq_id) & Fill(sdq_free, NSDQ)) |
|
||||
PriorityEncoderOH(~sdq_val(NSDQ-1,0)) & Fill(NSDQ, sdq_enq && io.req.bits.tag_miss)
|
||||
sdq_val := sdq_val & ~((UFix(1) << replay.bits.sdq_id) & Fill(sdq_free, conf.nsdq)) |
|
||||
PriorityEncoderOH(~sdq_val(conf.nsdq-1,0)) & Fill(conf.nsdq, sdq_enq && io.req.bits.tag_miss)
|
||||
val sdq_rdata = Reg() { io.req.bits.data.clone }
|
||||
sdq_rdata := sdq(Mux(replay.valid && !replay.ready, replay.bits.sdq_id, replay_arb.io.out.bits.sdq_id))
|
||||
io.data_req.bits.data := sdq_rdata
|
||||
@ -711,8 +713,8 @@ class AMOALU extends Component {
|
||||
class HellaCacheReq(implicit conf: DCacheConfig) extends Bundle {
|
||||
val kill = Bool()
|
||||
val typ = Bits(width = 3)
|
||||
val idx = Bits(width = conf.pgidxbits)
|
||||
val ppn = Bits(width = conf.ppnbits)
|
||||
val phys = Bool()
|
||||
val addr = UFix(width = conf.ppnbits.max(conf.vpnbits+1) + conf.pgidxbits)
|
||||
val data = Bits(width = conf.databits)
|
||||
val tag = Bits(width = conf.reqtagbits)
|
||||
val cmd = Bits(width = 4)
|
||||
@ -739,6 +741,7 @@ class AlignmentExceptions extends Bundle {
|
||||
|
||||
class HellaCacheExceptions extends Bundle {
|
||||
val ma = new AlignmentExceptions
|
||||
val pf = new AlignmentExceptions
|
||||
}
|
||||
|
||||
// interface between D$ and processor/DTLB
|
||||
@ -746,6 +749,7 @@ class ioHellaCache(implicit conf: DCacheConfig) extends Bundle {
|
||||
val req = (new FIFOIO){ new HellaCacheReq }
|
||||
val resp = (new PipeIO){ new HellaCacheResp }.flip
|
||||
val xcpt = (new HellaCacheExceptions).asInput
|
||||
val ptw = new IOTLBPTW().flip
|
||||
}
|
||||
|
||||
class HellaCache(implicit conf: DCacheConfig) extends Component {
|
||||
@ -768,6 +772,8 @@ class HellaCache(implicit conf: DCacheConfig) extends Component {
|
||||
val early_nack = Reg { Bool() }
|
||||
val r_cpu_req_val_ = Reg(io.cpu.req.valid && io.cpu.req.ready, resetVal = Bool(false))
|
||||
val r_cpu_req_val = r_cpu_req_val_ && !io.cpu.req.bits.kill && !early_nack
|
||||
val r_cpu_req_phys = Reg() { Bool() }
|
||||
val r_cpu_req_vpn = Reg() { UFix() }
|
||||
val r_cpu_req_idx = Reg() { Bits() }
|
||||
val r_cpu_req_cmd = Reg() { Bits() }
|
||||
val r_cpu_req_type = Reg() { Bits() }
|
||||
@ -799,6 +805,14 @@ class HellaCache(implicit conf: DCacheConfig) extends Component {
|
||||
val r_req_readwrite = r_req_read || r_req_write || r_req_prefetch
|
||||
val nack_hit = Bool()
|
||||
|
||||
val dtlb = new TLB(8)
|
||||
dtlb.io.ptw <> io.cpu.ptw
|
||||
dtlb.io.req.valid := r_cpu_req_val_ && r_req_readwrite && !r_cpu_req_phys
|
||||
dtlb.io.req.bits.passthrough := r_cpu_req_phys
|
||||
dtlb.io.req.bits.asid := UFix(0)
|
||||
dtlb.io.req.bits.vpn := r_cpu_req_vpn
|
||||
dtlb.io.req.bits.instruction := Bool(false)
|
||||
|
||||
val wb = new WritebackUnit
|
||||
val prober = new ProbeUnit
|
||||
val mshr = new MSHRFile
|
||||
@ -812,7 +826,9 @@ class HellaCache(implicit conf: DCacheConfig) extends Component {
|
||||
flusher.io.mshr_req.ready := mshr.io.req.ready
|
||||
|
||||
when (io.cpu.req.valid) {
|
||||
r_cpu_req_idx := io.cpu.req.bits.idx
|
||||
r_cpu_req_phys := io.cpu.req.bits.phys
|
||||
r_cpu_req_vpn := io.cpu.req.bits.addr >> taglsb
|
||||
r_cpu_req_idx := io.cpu.req.bits.addr(indexmsb,0)
|
||||
r_cpu_req_cmd := io.cpu.req.bits.cmd
|
||||
r_cpu_req_type := io.cpu.req.bits.typ
|
||||
r_cpu_req_tag := io.cpu.req.bits.tag
|
||||
@ -839,8 +855,10 @@ class HellaCache(implicit conf: DCacheConfig) extends Component {
|
||||
(((r_cpu_req_type === MT_W) || (r_cpu_req_type === MT_WU)) && (r_cpu_req_idx(1,0) != Bits(0))) ||
|
||||
((r_cpu_req_type === MT_D) && (r_cpu_req_idx(2,0) != Bits(0)));
|
||||
|
||||
io.cpu.xcpt.ma.ld := r_cpu_req_val_ && !early_nack && r_req_read && misaligned
|
||||
io.cpu.xcpt.ma.st := r_cpu_req_val_ && !early_nack && r_req_write && misaligned
|
||||
io.cpu.xcpt.ma.ld := r_cpu_req_val_ && r_req_read && misaligned
|
||||
io.cpu.xcpt.ma.st := r_cpu_req_val_ && r_req_write && misaligned
|
||||
io.cpu.xcpt.pf.ld := r_cpu_req_val_ && r_req_read && dtlb.io.resp.xcpt_ld
|
||||
io.cpu.xcpt.pf.st := r_cpu_req_val_ && r_req_write && dtlb.io.resp.xcpt_st
|
||||
|
||||
// tags
|
||||
val meta = new MetaDataArrayArray(lines)
|
||||
@ -855,11 +873,11 @@ class HellaCache(implicit conf: DCacheConfig) extends Component {
|
||||
|
||||
// cpu tag check
|
||||
meta_arb.io.in(3).valid := io.cpu.req.valid
|
||||
meta_arb.io.in(3).bits.idx := io.cpu.req.bits.idx(indexmsb,indexlsb)
|
||||
meta_arb.io.in(3).bits.idx := io.cpu.req.bits.addr(indexmsb,indexlsb)
|
||||
meta_arb.io.in(3).bits.rw := Bool(false)
|
||||
meta_arb.io.in(3).bits.way_en := Fix(-1)
|
||||
val early_tag_nack = !meta_arb.io.in(3).ready
|
||||
val cpu_req_ppn = Mux(prober.io.mshr_req.valid, prober.io.addr >> UFix(conf.pgidxbits-conf.offbits), io.cpu.req.bits.ppn)
|
||||
val cpu_req_ppn = Mux(prober.io.mshr_req.valid, prober.io.addr >> UFix(conf.pgidxbits-conf.offbits), dtlb.io.resp.ppn)
|
||||
val cpu_req_tag = Cat(cpu_req_ppn, r_cpu_req_idx)(tagmsb,taglsb)
|
||||
val tag_match_arr = (0 until conf.ways).map( w => conf.co.isValid(meta.io.resp(w).state) && (meta.io.resp(w).tag === cpu_req_tag))
|
||||
val tag_match = Cat(Bits(0),tag_match_arr:_*).orR
|
||||
@ -892,8 +910,8 @@ class HellaCache(implicit conf: DCacheConfig) extends Component {
|
||||
data_arb.io.in(0).valid := io.mem.xact_rep.valid && conf.co.messageUpdatesDataArray(io.mem.xact_rep.bits)
|
||||
|
||||
// load hits
|
||||
data_arb.io.in(4).bits.offset := io.cpu.req.bits.idx(offsetmsb,ramindexlsb)
|
||||
data_arb.io.in(4).bits.idx := io.cpu.req.bits.idx(indexmsb,indexlsb)
|
||||
data_arb.io.in(4).bits.offset := io.cpu.req.bits.addr(offsetmsb,ramindexlsb)
|
||||
data_arb.io.in(4).bits.idx := io.cpu.req.bits.addr(indexmsb,indexlsb)
|
||||
data_arb.io.in(4).bits.rw := Bool(false)
|
||||
data_arb.io.in(4).valid := io.cpu.req.valid && req_read
|
||||
data_arb.io.in(4).bits.way_en := Fix(-1) // intiate load on all ways, mux after tag check
|
||||
@ -1015,13 +1033,14 @@ class HellaCache(implicit conf: DCacheConfig) extends Component {
|
||||
val pending_fence = Reg(resetVal = Bool(false))
|
||||
pending_fence := (r_cpu_req_val_ && r_req_fence || pending_fence) && !mshr.io.fence_rdy
|
||||
nack_hit := p_store_match || replay_val || r_req_write && !p_store_rdy ||
|
||||
p_store_idx_match && meta.io.state_req.valid
|
||||
p_store_idx_match && meta.io.state_req.valid ||
|
||||
!r_cpu_req_phys && dtlb.io.resp.miss
|
||||
val nack_miss = !mshr.io.req.ready
|
||||
val nack_flush = !mshr.io.fence_rdy && (r_req_fence || r_req_flush) ||
|
||||
!flushed && r_req_flush
|
||||
val nack = early_nack || r_req_readwrite && (nack_hit || nack_miss) || nack_flush
|
||||
|
||||
io.cpu.req.ready := flusher.io.req.ready && !(r_cpu_req_val_ && r_req_flush) && !pending_fence
|
||||
io.cpu.req.ready := flusher.io.req.ready && !(r_cpu_req_val_ && r_req_flush) && !pending_fence && (dtlb.io.req.ready || io.cpu.req.bits.phys)
|
||||
io.cpu.resp.valid := (r_cpu_req_val && tag_hit && !mshr.io.secondary_miss && !nack && r_req_read) || mshr.io.cpu_resp_val
|
||||
io.cpu.resp.bits.nack := r_cpu_req_val_ && !io.cpu.req.bits.kill && nack
|
||||
io.cpu.resp.bits.replay := mshr.io.cpu_resp_val
|
||||
|
@ -5,16 +5,31 @@ import Node._
|
||||
import Constants._
|
||||
import scala.math._
|
||||
|
||||
class ioPTW(n: Int)(implicit conf: RocketConfiguration) extends Bundle
|
||||
{
|
||||
val requestor = Vec(n) { new IOTLBPTW }.flip
|
||||
val mem = new ioHellaCache()(conf.dcache)
|
||||
val ptbr = UFix(INPUT, PADDR_BITS)
|
||||
class IOTLBPTW extends Bundle {
|
||||
val req = new FIFOIO()(UFix(width = VPN_BITS))
|
||||
val resp = new PipeIO()(new Bundle {
|
||||
val error = Bool()
|
||||
val ppn = UFix(width = PPN_BITS)
|
||||
val perm = Bits(width = PERM_BITS)
|
||||
}).flip
|
||||
|
||||
val status = Bits(INPUT, width = 32)
|
||||
val invalidate = Bool(INPUT)
|
||||
}
|
||||
|
||||
class IODatapathPTW extends Bundle {
|
||||
val ptbr = UFix(INPUT, PADDR_BITS)
|
||||
val invalidate = Bool(INPUT)
|
||||
val status = Bits(INPUT, 32)
|
||||
}
|
||||
|
||||
class PTW(n: Int)(implicit conf: RocketConfiguration) extends Component
|
||||
{
|
||||
val io = new ioPTW(n)
|
||||
val io = new Bundle {
|
||||
val requestor = Vec(n) { new IOTLBPTW }.flip
|
||||
val mem = new ioHellaCache()(conf.dcache)
|
||||
val dpath = new IODatapathPTW
|
||||
}
|
||||
|
||||
val levels = 3
|
||||
val bitsPerLevel = VPN_BITS/levels
|
||||
@ -27,7 +42,7 @@ class PTW(n: Int)(implicit conf: RocketConfiguration) extends Component
|
||||
val r_req_vpn = Reg() { Bits() }
|
||||
val r_req_dest = Reg() { Bits() }
|
||||
|
||||
val req_addr = Reg() { Bits() }
|
||||
val req_addr = Reg() { UFix() }
|
||||
val r_resp_ppn = Reg() { Bits() };
|
||||
val r_resp_perm = Reg() { Bits() };
|
||||
|
||||
@ -41,21 +56,21 @@ class PTW(n: Int)(implicit conf: RocketConfiguration) extends Component
|
||||
when (arb.io.out.fire()) {
|
||||
r_req_vpn := arb.io.out.bits
|
||||
r_req_dest := arb.io.chosen
|
||||
req_addr := Cat(io.ptbr(PADDR_BITS-1,PGIDX_BITS), arb.io.out.bits(VPN_BITS-1,VPN_BITS-bitsPerLevel), Bits(0,3))
|
||||
req_addr := Cat(io.dpath.ptbr(PADDR_BITS-1,PGIDX_BITS), arb.io.out.bits(VPN_BITS-1,VPN_BITS-bitsPerLevel), UFix(0,3))
|
||||
}
|
||||
|
||||
val dmem_resp_val = Reg(io.mem.resp.valid, resetVal = Bool(false))
|
||||
when (dmem_resp_val) {
|
||||
req_addr := Cat(io.mem.resp.bits.data_subword(PADDR_BITS-1, PGIDX_BITS), vpn_idx, Bits(0,3))
|
||||
req_addr := Cat(io.mem.resp.bits.data_subword(PADDR_BITS-1, PGIDX_BITS), vpn_idx, UFix(0,3)).toUFix
|
||||
r_resp_perm := io.mem.resp.bits.data_subword(9,4);
|
||||
r_resp_ppn := io.mem.resp.bits.data_subword(PADDR_BITS-1, PGIDX_BITS);
|
||||
}
|
||||
|
||||
io.mem.req.valid := state === s_req
|
||||
io.mem.req.bits.phys := Bool(true)
|
||||
io.mem.req.bits.cmd := M_XRD
|
||||
io.mem.req.bits.typ := MT_D
|
||||
io.mem.req.bits.idx := req_addr(PGIDX_BITS-1,0)
|
||||
io.mem.req.bits.ppn := Reg(req_addr(PADDR_BITS-1,PGIDX_BITS))
|
||||
io.mem.req.bits.addr := req_addr
|
||||
io.mem.req.bits.kill := Bool(false)
|
||||
|
||||
val resp_val = state === s_done || state === s_error
|
||||
@ -73,6 +88,8 @@ class PTW(n: Int)(implicit conf: RocketConfiguration) extends Component
|
||||
io.requestor(i).resp.bits.error := resp_err
|
||||
io.requestor(i).resp.bits.perm := r_resp_perm
|
||||
io.requestor(i).resp.bits.ppn := resp_ppn.toUFix
|
||||
io.requestor(i).invalidate := io.dpath.invalidate
|
||||
io.requestor(i).status := io.dpath.status
|
||||
}
|
||||
|
||||
// control state machine
|
||||
|
@ -13,7 +13,8 @@ case class RocketConfiguration(ntiles: Int, co: CoherencePolicyWithUncached,
|
||||
|
||||
class Tile(resetSignal: Bool = null)(confIn: RocketConfiguration) extends Component(resetSignal)
|
||||
{
|
||||
implicit val dcConf = confIn.dcache.copy(reqtagbits = confIn.dcacheReqTagBits + log2Up(DMEM_PORTS))
|
||||
val memPorts = if (HAVE_VEC) 3 else 2
|
||||
implicit val dcConf = confIn.dcache.copy(reqtagbits = confIn.dcacheReqTagBits + log2Up(memPorts))
|
||||
implicit val conf = confIn.copy(dcache = dcConf)
|
||||
|
||||
val io = new Bundle {
|
||||
@ -21,13 +22,13 @@ class Tile(resetSignal: Bool = null)(confIn: RocketConfiguration) extends Compon
|
||||
val host = new ioHTIF(conf.ntiles)
|
||||
}
|
||||
|
||||
val cpu = new rocketProc
|
||||
val core = new Core
|
||||
val icache = new Frontend()(confIn.icache)
|
||||
val dcache = new HellaCache
|
||||
|
||||
val arbiter = new MemArbiter(DMEM_PORTS)
|
||||
arbiter.io.requestor(DMEM_DCACHE) <> dcache.io.mem
|
||||
arbiter.io.requestor(DMEM_ICACHE) <> icache.io.mem
|
||||
val arbiter = new MemArbiter(memPorts)
|
||||
arbiter.io.requestor(0) <> dcache.io.mem
|
||||
arbiter.io.requestor(1) <> icache.io.mem
|
||||
|
||||
io.tilelink.xact_init <> arbiter.io.mem.xact_init
|
||||
io.tilelink.xact_init_data <> dcache.io.mem.xact_init_data
|
||||
@ -38,14 +39,13 @@ class Tile(resetSignal: Bool = null)(confIn: RocketConfiguration) extends Compon
|
||||
io.tilelink.probe_rep <> dcache.io.mem.probe_rep
|
||||
io.tilelink.probe_rep_data <> dcache.io.mem.probe_rep_data
|
||||
|
||||
if (HAVE_VEC)
|
||||
{
|
||||
if (HAVE_VEC) {
|
||||
val vicache = new Frontend()(ICacheConfig(128, 1, conf.co)) // 128 sets x 1 ways (8KB)
|
||||
arbiter.io.requestor(DMEM_VICACHE) <> vicache.io.mem
|
||||
cpu.io.vimem <> vicache.io.cpu
|
||||
arbiter.io.requestor(2) <> vicache.io.mem
|
||||
core.io.vimem <> vicache.io.cpu
|
||||
}
|
||||
|
||||
cpu.io.host <> io.host
|
||||
cpu.io.imem <> icache.io.cpu
|
||||
cpu.io.dmem <> dcache.io.cpu
|
||||
core.io.host <> io.host
|
||||
core.io.imem <> icache.io.cpu
|
||||
core.io.dmem <> dcache.io.cpu
|
||||
}
|
||||
|
@ -66,21 +66,11 @@ class PseudoLRU(n: Int)
|
||||
}
|
||||
}
|
||||
|
||||
class IOTLBPTW extends Bundle {
|
||||
val req = new FIFOIO()(UFix(width = VPN_BITS))
|
||||
val resp = new PipeIO()(new Bundle {
|
||||
val error = Bool()
|
||||
val ppn = UFix(width = PPN_BITS)
|
||||
val perm = Bits(width = PERM_BITS)
|
||||
}).flip
|
||||
}
|
||||
|
||||
class TLBReq extends Bundle
|
||||
{
|
||||
val asid = UFix(width = ASID_BITS)
|
||||
val vpn = UFix(width = VPN_BITS+1)
|
||||
val status = Bits(width = 32)
|
||||
val invalidate = Bool()
|
||||
val passthrough = Bool()
|
||||
val instruction = Bool()
|
||||
}
|
||||
|
||||
@ -116,7 +106,7 @@ class TLB(entries: Int) extends Component
|
||||
when (io.ptw.resp.valid) { tag_ram(r_refill_waddr) := io.ptw.resp.bits.ppn }
|
||||
|
||||
val lookup_tag = Cat(io.req.bits.asid, io.req.bits.vpn).toUFix
|
||||
tag_cam.io.clear := io.req.bits.invalidate
|
||||
tag_cam.io.clear := io.ptw.invalidate
|
||||
tag_cam.io.clear_hit := io.req.fire() && Mux(io.req.bits.instruction, io.resp.xcpt_if, io.resp.xcpt_ld && io.resp.xcpt_st)
|
||||
tag_cam.io.tag := lookup_tag
|
||||
tag_cam.io.write := state === s_wait && io.ptw.resp.valid
|
||||
@ -148,8 +138,8 @@ class TLB(entries: Int) extends Component
|
||||
val plru = new PseudoLRU(entries)
|
||||
val repl_waddr = Mux(has_invalid_entry, invalid_entry, plru.replace)
|
||||
|
||||
val status_s = io.req.bits.status(SR_S) // user/supervisor mode
|
||||
val status_vm = io.req.bits.status(SR_VM) // virtual memory enable
|
||||
val status_s = io.ptw.status(SR_S) // user/supervisor mode
|
||||
val status_vm = io.ptw.status(SR_VM) // virtual memory enable
|
||||
val bad_va = io.req.bits.vpn(VPN_BITS) != io.req.bits.vpn(VPN_BITS-1)
|
||||
val tlb_hit = status_vm && tag_hit
|
||||
val tlb_miss = status_vm && !tag_hit && !bad_va
|
||||
@ -163,7 +153,7 @@ class TLB(entries: Int) extends Component
|
||||
io.resp.xcpt_st := bad_va || tlb_hit && !Mux(status_s, sw_array(tag_hit_addr), uw_array(tag_hit_addr))
|
||||
io.resp.xcpt_if := bad_va || tlb_hit && !Mux(status_s, sx_array(tag_hit_addr), ux_array(tag_hit_addr))
|
||||
io.resp.miss := tlb_miss
|
||||
io.resp.ppn := Mux(status_vm, Mux1H(tag_cam.io.hits, tag_ram), io.req.bits.vpn(PPN_BITS-1,0))
|
||||
io.resp.ppn := Mux(status_vm && !io.req.bits.passthrough, Mux1H(tag_cam.io.hits, tag_ram), io.req.bits.vpn(PPN_BITS-1,0))
|
||||
io.resp.hit_idx := tag_cam.io.hits
|
||||
|
||||
io.ptw.req.valid := state === s_request
|
||||
@ -175,15 +165,15 @@ class TLB(entries: Int) extends Component
|
||||
r_refill_waddr := repl_waddr
|
||||
}
|
||||
when (state === s_request) {
|
||||
when (io.req.bits.invalidate) {
|
||||
when (io.ptw.invalidate) {
|
||||
state := s_ready
|
||||
}
|
||||
when (io.ptw.req.ready) {
|
||||
state := s_wait
|
||||
when (io.req.bits.invalidate) { state := s_wait_invalidate }
|
||||
when (io.ptw.invalidate) { state := s_wait_invalidate }
|
||||
}
|
||||
}
|
||||
when (state === s_wait && io.req.bits.invalidate) {
|
||||
when (state === s_wait && io.ptw.invalidate) {
|
||||
state := s_wait_invalidate
|
||||
}
|
||||
when ((state === s_wait || state === s_wait_invalidate) && io.ptw.resp.valid) {
|
||||
@ -204,10 +194,6 @@ class ioDTLB_CPU_resp extends TLBResp(1)
|
||||
|
||||
class ioDTLB extends Bundle
|
||||
{
|
||||
// status bits (from PCR), to check current permission and whether VM is enabled
|
||||
val status = Bits(INPUT, 32)
|
||||
// invalidate all TLB entries
|
||||
val invalidate = Bool(INPUT)
|
||||
val cpu_req = new ioDTLB_CPU_req().flip
|
||||
val cpu_resp = new ioDTLB_CPU_resp()
|
||||
val ptw = new IOTLBPTW
|
||||
@ -225,8 +211,7 @@ class rocketTLB(entries: Int) extends Component
|
||||
val tlb = new TLB(entries)
|
||||
tlb.io.req.valid := r_cpu_req_val && !io.cpu_req.bits.kill
|
||||
tlb.io.req.bits.instruction := Bool(false)
|
||||
tlb.io.req.bits.invalidate := io.invalidate
|
||||
tlb.io.req.bits.status := io.status
|
||||
tlb.io.req.bits.passthrough := Bool(false)
|
||||
tlb.io.req.bits.vpn := r_cpu_req_vpn
|
||||
tlb.io.req.bits.asid := r_cpu_req_asid
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user