1
0

remove more global consts; refactor DTLBs

D$ now contains DTLB. provide full VAddr with initial request.
VU now has its own DTLBs.
This commit is contained in:
Andrew Waterman 2012-11-06 08:13:44 -08:00
parent e76892f758
commit 4d1ca8ba3a
11 changed files with 206 additions and 281 deletions

View File

@ -12,58 +12,40 @@ class HellaCacheArbiter(n: Int)(implicit conf: RocketConfiguration) extends Comp
val mem = new ioHellaCache()(conf.dcache) val mem = new ioHellaCache()(conf.dcache)
} }
var req_val = Bool(false) val r_valid = io.requestor.map(r => Reg(r.req.valid))
var req_rdy = io.mem.req.ready
for (i <- 0 until n) io.mem.req.valid := io.requestor.map(_.req.valid).reduce(_||_)
{ io.requestor(0).req.ready := io.mem.req.ready
io.requestor(i).req.ready := req_rdy for (i <- 1 until n)
req_val = req_val || io.requestor(i).req.valid io.requestor(i).req.ready := io.requestor(i-1).req.ready && !io.requestor(i-1).req.valid
req_rdy = req_rdy && !io.requestor(i).req.valid
io.mem.req.bits := io.requestor(n-1).req.bits
io.mem.req.bits.tag := Cat(io.requestor(n-1).req.bits.tag, UFix(n-1, log2Up(n)))
for (i <- n-2 to 0 by -1) {
val req = io.requestor(i).req
when (req.valid) {
io.mem.req.bits.cmd := req.bits.cmd
io.mem.req.bits.typ := req.bits.typ
io.mem.req.bits.addr := req.bits.addr
io.mem.req.bits.phys := req.bits.phys
io.mem.req.bits.tag := Cat(req.bits.tag, UFix(i, log2Up(n)))
}
when (r_valid(i)) {
io.mem.req.bits.kill := req.bits.kill
io.mem.req.bits.data := req.bits.data
}
} }
var req_cmd = io.requestor(n-1).req.bits.cmd for (i <- 0 until n) {
var req_type = io.requestor(n-1).req.bits.typ val resp = io.requestor(i).resp
var req_idx = io.requestor(n-1).req.bits.idx
var req_ppn = io.requestor(n-1).req.bits.ppn
var req_data = io.requestor(n-1).req.bits.data
var req_kill = io.requestor(n-1).req.bits.kill
var req_tag = io.requestor(n-1).req.bits.tag
for (i <- n-1 to 0 by -1)
{
val r = io.requestor(i).req
req_cmd = Mux(r.valid, r.bits.cmd, req_cmd)
req_type = Mux(r.valid, r.bits.typ, req_type)
req_idx = Mux(r.valid, r.bits.idx, req_idx)
req_ppn = Mux(Reg(r.valid), r.bits.ppn, req_ppn)
req_data = Mux(Reg(r.valid), r.bits.data, req_data)
req_kill = Mux(Reg(r.valid), r.bits.kill, req_kill)
req_tag = Mux(r.valid, Cat(r.bits.tag, UFix(i, log2Up(n))), req_tag)
}
io.mem.req.valid := req_val
io.mem.req.bits.cmd := req_cmd
io.mem.req.bits.typ := req_type
io.mem.req.bits.idx := req_idx
io.mem.req.bits.ppn := req_ppn
io.mem.req.bits.data := req_data
io.mem.req.bits.kill := req_kill
io.mem.req.bits.tag := req_tag
for (i <- 0 until n)
{
val r = io.requestor(i).resp
val x = io.requestor(i).xcpt
val tag_hit = io.mem.resp.bits.tag(log2Up(n)-1,0) === UFix(i) val tag_hit = io.mem.resp.bits.tag(log2Up(n)-1,0) === UFix(i)
x.ma.ld := io.mem.xcpt.ma.ld && Reg(io.requestor(i).req.valid) resp.valid := io.mem.resp.valid && tag_hit
x.ma.st := io.mem.xcpt.ma.st && Reg(io.requestor(i).req.valid) io.requestor(i).xcpt := io.mem.xcpt
r.valid := io.mem.resp.valid && tag_hit resp.bits := io.mem.resp.bits
r.bits.miss := io.mem.resp.bits.miss && tag_hit resp.bits.tag := io.mem.resp.bits.tag >> UFix(log2Up(n))
r.bits.nack := io.mem.resp.bits.nack && Reg(io.requestor(i).req.valid) resp.bits.miss := io.mem.resp.bits.miss && tag_hit
r.bits.replay := io.mem.resp.bits.replay && tag_hit resp.bits.nack := io.mem.resp.bits.nack && r_valid(i)
r.bits.data := io.mem.resp.bits.data resp.bits.replay := io.mem.resp.bits.replay && tag_hit
r.bits.data_subword := io.mem.resp.bits.data_subword
r.bits.typ := io.mem.resp.bits.typ
r.bits.tag := io.mem.resp.bits.tag >> UFix(log2Up(n))
} }
} }
@ -80,15 +62,6 @@ class MemArbiter(n: Int) extends Component {
val requestor = Vec(n) { new ioUncachedRequestor }.flip val requestor = Vec(n) { new ioUncachedRequestor }.flip
} }
var xi_val = Bool(false)
var xi_rdy = io.mem.xact_init.ready
for (i <- 0 until n)
{
io.requestor(i).xact_init.ready := xi_rdy
xi_val = xi_val || io.requestor(i).xact_init.valid
xi_rdy = xi_rdy && !io.requestor(i).xact_init.valid
}
var xi_bits = new TransactionInit var xi_bits = new TransactionInit
xi_bits := io.requestor(n-1).xact_init.bits xi_bits := io.requestor(n-1).xact_init.bits
xi_bits.tile_xact_id := Cat(io.requestor(n-1).xact_init.bits.tile_xact_id, UFix(n-1, log2Up(n))) xi_bits.tile_xact_id := Cat(io.requestor(n-1).xact_init.bits.tile_xact_id, UFix(n-1, log2Up(n)))
@ -101,24 +74,21 @@ class MemArbiter(n: Int) extends Component {
xi_bits = Mux(io.requestor(i).xact_init.valid, my_xi_bits, xi_bits) xi_bits = Mux(io.requestor(i).xact_init.valid, my_xi_bits, xi_bits)
} }
io.mem.xact_init.valid := xi_val
io.mem.xact_init.bits := xi_bits io.mem.xact_init.bits := xi_bits
io.mem.xact_init.valid := io.requestor.map(_.xact_init.valid).reduce(_||_)
var xf_val = Bool(false) io.requestor(0).xact_init.ready := io.mem.xact_init.ready
var xf_rdy = io.mem.xact_finish.ready for (i <- 1 until n)
for (i <- 0 until n) io.requestor(i).xact_init.ready := io.requestor(i-1).xact_init.ready && !io.requestor(i-1).xact_init.valid
{
io.requestor(i).xact_finish.ready := xf_rdy
xf_val = xf_val || io.requestor(i).xact_finish.valid
xf_rdy = xf_rdy && !io.requestor(i).xact_finish.valid
}
var xf_bits = io.requestor(n-1).xact_finish.bits var xf_bits = io.requestor(n-1).xact_finish.bits
for (i <- n-2 to 0 by -1) for (i <- n-2 to 0 by -1)
xf_bits = Mux(io.requestor(i).xact_finish.valid, io.requestor(i).xact_finish.bits, xf_bits) xf_bits = Mux(io.requestor(i).xact_finish.valid, io.requestor(i).xact_finish.bits, xf_bits)
io.mem.xact_finish.valid := xf_val
io.mem.xact_finish.bits := xf_bits io.mem.xact_finish.bits := xf_bits
io.mem.xact_finish.valid := io.requestor.map(_.xact_finish.valid).reduce(_||_)
io.requestor(0).xact_finish.ready := io.mem.xact_finish.ready
for (i <- 1 until n)
io.requestor(i).xact_finish.ready := io.requestor(i-1).xact_finish.ready && !io.requestor(i-1).xact_finish.valid
for (i <- 0 until n) for (i <- 0 until n)
{ {

View File

@ -154,12 +154,7 @@ trait InterruptConstants {
val IRQ_TIMER = 7 val IRQ_TIMER = 7
} }
abstract trait RocketDcacheConstants extends ArbiterConstants with uncore.constants.AddressConstants { abstract trait RocketDcacheConstants extends TileConfigConstants with uncore.constants.CacheConstants with uncore.constants.AddressConstants {
val NMSHR = if (HAVE_VEC) 4 else 2 // number of primary misses
require(log2Up(NMSHR)+3 <= uncore.Constants.TILE_XACT_ID_BITS)
val NRPQ = 16; // number of secondary misses
val NSDQ = 17; // number of secondary stores/AMOs
val OFFSET_BITS = 6; // log2(cache line size in bytes)
require(OFFSET_BITS == log2Up(uncore.Constants.CACHE_DATA_SIZE_IN_BYTES)) require(OFFSET_BITS == log2Up(uncore.Constants.CACHE_DATA_SIZE_IN_BYTES))
require(OFFSET_BITS <= uncore.Constants.X_INIT_WRITE_MASK_BITS) require(OFFSET_BITS <= uncore.Constants.X_INIT_WRITE_MASK_BITS)
require(log2Up(OFFSET_BITS) <= uncore.Constants.X_INIT_SUBWORD_ADDR_BITS) require(log2Up(OFFSET_BITS) <= uncore.Constants.X_INIT_SUBWORD_ADDR_BITS)
@ -196,15 +191,3 @@ trait VectorOpConstants {
val VIMM2_ALU = UFix(1, 1) val VIMM2_ALU = UFix(1, 1)
val VIMM2_X = UFix(0, 1) val VIMM2_X = UFix(0, 1)
} }
abstract trait ArbiterConstants extends TileConfigConstants {
val DCACHE_PORTS = 3
val DCACHE_CPU = 0
val DCACHE_PTW = 1
val DCACHE_VU = 2
val DMEM_PORTS = if (HAVE_VEC) 3 else 2
val DMEM_DCACHE = 0
val DMEM_ICACHE = 1
val DMEM_VICACHE = 2
}

View File

@ -13,87 +13,57 @@ class ioRocket(implicit conf: RocketConfiguration) extends Bundle
val dmem = new ioHellaCache()(conf.dcache) val dmem = new ioHellaCache()(conf.dcache)
} }
class rocketProc(implicit conf: RocketConfiguration) extends Component class Core(implicit conf: RocketConfiguration) extends Component
{ {
val io = new ioRocket val io = new ioRocket
val ctrl = new Control val ctrl = new Control
val dpath = new Datapath val dpath = new Datapath
val ptw = Vec(0) { new IOTLBPTW }
val arb = new HellaCacheArbiter(DCACHE_PORTS)
var vu: vu = null
if (HAVE_VEC)
{
vu = new vu()
val vdtlb = new rocketTLB(8)
vdtlb.io.invalidate := dpath.io.ptbr_wen
vdtlb.io.status := dpath.io.ctrl.status
ptw += vdtlb.io.ptw
vdtlb.io.cpu_req <> vu.io.vec_tlb_req
vu.io.vec_tlb_resp := vdtlb.io.cpu_resp
vu.io.vec_tlb_resp.xcpt_pf := Bool(false)
val pftlb = new rocketTLB(2)
pftlb.io.invalidate := dpath.io.ptbr_wen
pftlb.io.status := dpath.io.ctrl.status
pftlb.io.cpu_req <> vu.io.vec_pftlb_req
ptw += pftlb.io.ptw
vu.io.vec_pftlb_resp := pftlb.io.cpu_resp
vu.io.vec_pftlb_resp.xcpt_ld := Bool(false)
vu.io.vec_pftlb_resp.xcpt_st := Bool(false)
}
// connect DTLB to ctrl+dpath
val dtlb = new rocketTLB(DTLB_ENTRIES)
dtlb.io.invalidate := dpath.io.ptbr_wen
dtlb.io.status := dpath.io.ctrl.status
ptw += dtlb.io.ptw
dtlb.io.cpu_req.valid := ctrl.io.dtlb_val
dtlb.io.cpu_req.bits.kill := ctrl.io.dtlb_kill
dtlb.io.cpu_req.bits.cmd := ctrl.io.dmem.req.bits.cmd
dtlb.io.cpu_req.bits.asid := UFix(0)
dtlb.io.cpu_req.bits.vpn := dpath.io.dtlb.vpn
ctrl.io.xcpt_dtlb_ld := dtlb.io.cpu_resp.xcpt_ld
ctrl.io.xcpt_dtlb_st := dtlb.io.cpu_resp.xcpt_st
ctrl.io.dtlb_rdy := dtlb.io.cpu_req.ready
ctrl.io.dtlb_miss := dtlb.io.cpu_resp.miss
arb.io.requestor(DCACHE_CPU).req.bits.ppn := dtlb.io.cpu_resp.ppn
ctrl.io.dpath <> dpath.io.ctrl ctrl.io.dpath <> dpath.io.ctrl
dpath.io.host <> io.host dpath.io.host <> io.host
ctrl.io.imem <> io.imem ctrl.io.imem <> io.imem
dpath.io.imem <> io.imem dpath.io.imem <> io.imem
ctrl.io.dmem <> arb.io.requestor(DCACHE_CPU) val dmemArb = new HellaCacheArbiter(if (HAVE_VEC) 3 else 2)
dpath.io.dmem <> arb.io.requestor(DCACHE_CPU) dmemArb.io.mem <> io.dmem
val dmem = dmemArb.io.requestor
dmem(1) <> ctrl.io.dmem
dmem(1) <> dpath.io.dmem
var fpu: rocketFPU = null val ptw = collection.mutable.ArrayBuffer(io.imem.ptw, io.dmem.ptw)
if (HAVE_FPU)
{ val fpu: FPU = if (HAVE_FPU) {
fpu = new rocketFPU(4,6) val fpu = new FPU(4,6)
dpath.io.fpu <> fpu.io.dpath dpath.io.fpu <> fpu.io.dpath
ctrl.io.fpu <> fpu.io.ctrl ctrl.io.fpu <> fpu.io.ctrl
} fpu
} else null
if (HAVE_VEC) {
val vu = new vu()
val vdtlb = new rocketTLB(8)
ptw += vdtlb.io.ptw
vdtlb.io.cpu_req <> vu.io.vec_tlb_req
vu.io.vec_tlb_resp := vdtlb.io.cpu_resp
vu.io.vec_tlb_resp.xcpt_pf := Bool(false)
val pftlb = new rocketTLB(2)
pftlb.io.cpu_req <> vu.io.vec_pftlb_req
ptw += pftlb.io.ptw
vu.io.vec_pftlb_resp := pftlb.io.cpu_resp
vu.io.vec_pftlb_resp.xcpt_ld := Bool(false)
vu.io.vec_pftlb_resp.xcpt_st := Bool(false)
if (HAVE_VEC)
{
dpath.io.vec_ctrl <> ctrl.io.vec_dpath dpath.io.vec_ctrl <> ctrl.io.vec_dpath
// hooking up vector I$ // hooking up vector I$
ptw += io.vimem.ptw ptw += io.vimem.ptw
io.vimem.req.bits.status := dpath.io.ctrl.status
io.vimem.req.bits.pc := vu.io.imem_req.bits io.vimem.req.bits.pc := vu.io.imem_req.bits
io.vimem.req.valid := vu.io.imem_req.valid io.vimem.req.valid := vu.io.imem_req.valid
io.vimem.req.bits.invalidate := ctrl.io.dpath.flush_inst io.vimem.req.bits.invalidate := ctrl.io.dpath.flush_inst
io.vimem.req.bits.invalidateTLB := dpath.io.ptbr_wen
vu.io.imem_resp.valid := io.vimem.resp.valid vu.io.imem_resp.valid := io.vimem.resp.valid
vu.io.imem_resp.bits.pc := io.vimem.resp.bits.pc vu.io.imem_resp.bits.pc := io.vimem.resp.bits.pc
vu.io.imem_resp.bits.data := io.vimem.resp.bits.data vu.io.imem_resp.bits.data := io.vimem.resp.bits.data
@ -155,21 +125,16 @@ class rocketProc(implicit conf: RocketConfiguration) extends Component
vu.io.xcpt.hold := ctrl.io.vec_iface.hold vu.io.xcpt.hold := ctrl.io.vec_iface.hold
// hooking up vector memory interface // hooking up vector memory interface
arb.io.requestor(DCACHE_VU).req.valid := vu.io.dmem_req.valid dmem(2).req.valid := vu.io.dmem_req.valid
arb.io.requestor(DCACHE_VU).req.bits.kill := vu.io.dmem_req.bits.kill dmem(2).req.bits := vu.io.dmem_req.bits
arb.io.requestor(DCACHE_VU).req.bits.cmd := vu.io.dmem_req.bits.cmd dmem(2).req.bits.data := Reg(StoreGen(vu.io.dmem_req.bits.typ, Bits(0), vu.io.dmem_req.bits.data).data)
arb.io.requestor(DCACHE_VU).req.bits.typ := vu.io.dmem_req.bits.typ
arb.io.requestor(DCACHE_VU).req.bits.idx := vu.io.dmem_req.bits.idx
arb.io.requestor(DCACHE_VU).req.bits.ppn := Reg(vu.io.dmem_req.bits.ppn)
arb.io.requestor(DCACHE_VU).req.bits.data := Reg(StoreGen(vu.io.dmem_req.bits.typ, Bits(0), vu.io.dmem_req.bits.data).data)
arb.io.requestor(DCACHE_VU).req.bits.tag := vu.io.dmem_req.bits.tag
vu.io.dmem_req.ready := arb.io.requestor(DCACHE_VU).req.ready vu.io.dmem_req.ready := dmem(2).req.ready
vu.io.dmem_resp.valid := Reg(arb.io.requestor(DCACHE_VU).resp.valid) vu.io.dmem_resp.valid := Reg(dmem(2).resp.valid)
vu.io.dmem_resp.bits.nack := arb.io.requestor(DCACHE_VU).resp.bits.nack vu.io.dmem_resp.bits.nack := dmem(2).resp.bits.nack
vu.io.dmem_resp.bits.data := arb.io.requestor(DCACHE_VU).resp.bits.data_subword vu.io.dmem_resp.bits.data := dmem(2).resp.bits.data_subword
vu.io.dmem_resp.bits.tag := Reg(arb.io.requestor(DCACHE_VU).resp.bits.tag) vu.io.dmem_resp.bits.tag := Reg(dmem(2).resp.bits.tag)
vu.io.dmem_resp.bits.typ := Reg(arb.io.requestor(DCACHE_VU).resp.bits.typ) vu.io.dmem_resp.bits.typ := Reg(dmem(2).resp.bits.typ)
// share vector integer multiplier with rocket // share vector integer multiplier with rocket
dpath.io.vec_imul_req <> vu.io.cp_imul_req dpath.io.vec_imul_req <> vu.io.cp_imul_req
@ -178,22 +143,13 @@ class rocketProc(implicit conf: RocketConfiguration) extends Component
// share sfma and dfma pipelines with rocket // share sfma and dfma pipelines with rocket
fpu.io.sfma <> vu.io.cp_sfma fpu.io.sfma <> vu.io.cp_sfma
fpu.io.dfma <> vu.io.cp_dfma fpu.io.dfma <> vu.io.cp_dfma
} } else if (fpu != null) {
else fpu.io.sfma.valid := Bool(false)
{ fpu.io.dfma.valid := Bool(false)
arb.io.requestor(DCACHE_VU).req.valid := Bool(false)
if (HAVE_FPU)
{
fpu.io.sfma.valid := Bool(false)
fpu.io.dfma.valid := Bool(false)
}
} }
ptw += io.imem.ptw
val thePTW = new PTW(ptw.length) val thePTW = new PTW(ptw.length)
thePTW.io.requestor <> ptw ptw zip thePTW.io.requestor map { case (a, b) => a <> b }
thePTW.io.ptbr := dpath.io.ptbr; thePTW.io.dpath <> dpath.io.ptw
arb.io.requestor(DCACHE_PTW) <> thePTW.io.mem dmem(0) <> thePTW.io.mem
arb.io.mem <> io.dmem
} }

View File

@ -569,8 +569,8 @@ class Control(implicit conf: RocketConfiguration) extends Component
(mem_reg_xcpt_interrupt || mem_reg_xcpt, mem_reg_cause), (mem_reg_xcpt_interrupt || mem_reg_xcpt, mem_reg_cause),
(mem_reg_mem_val && io.dmem.xcpt.ma.ld, UFix( 8)), (mem_reg_mem_val && io.dmem.xcpt.ma.ld, UFix( 8)),
(mem_reg_mem_val && io.dmem.xcpt.ma.st, UFix( 9)), (mem_reg_mem_val && io.dmem.xcpt.ma.st, UFix( 9)),
(mem_reg_mem_val && io.xcpt_dtlb_ld, UFix(10)), (mem_reg_mem_val && io.dmem.xcpt.pf.ld, UFix(10)),
(mem_reg_mem_val && io.xcpt_dtlb_st, UFix(11)))) (mem_reg_mem_val && io.dmem.xcpt.pf.st, UFix(11))))
wb_reg_xcpt := mem_xcpt && !take_pc_wb && !wb_reg_replay_next wb_reg_xcpt := mem_xcpt && !take_pc_wb && !wb_reg_replay_next
when (mem_xcpt) { wb_reg_cause := mem_cause } when (mem_xcpt) { wb_reg_cause := mem_cause }
@ -644,7 +644,7 @@ class Control(implicit conf: RocketConfiguration) extends Component
// replay inst in ex stage // replay inst in ex stage
val replay_ex = wb_reg_dcache_miss && ex_reg_load_use || mem_reg_flush_inst || val replay_ex = wb_reg_dcache_miss && ex_reg_load_use || mem_reg_flush_inst ||
ex_reg_mem_val && !(io.dmem.req.ready && io.dtlb_rdy) || ex_reg_mem_val && !io.dmem.req.ready ||
ex_reg_div_val && !io.dpath.div_rdy || ex_reg_div_val && !io.dpath.div_rdy ||
ex_reg_mul_val && !io.dpath.mul_rdy || ex_reg_mul_val && !io.dpath.mul_rdy ||
mem_reg_replay_next mem_reg_replay_next
@ -652,7 +652,7 @@ class Control(implicit conf: RocketConfiguration) extends Component
// replay inst in mem stage // replay inst in mem stage
val mem_ll_wb = io.dpath.mem_wb || io.dpath.mul_result_val || io.dpath.div_result_val val mem_ll_wb = io.dpath.mem_wb || io.dpath.mul_result_val || io.dpath.div_result_val
val dmem_kill_mem = mem_reg_valid && (io.dtlb_miss || io.dmem.resp.bits.nack) val dmem_kill_mem = mem_reg_valid && io.dmem.resp.bits.nack
val fpu_kill_mem = mem_reg_fp_val && io.fpu.nack_mem val fpu_kill_mem = mem_reg_fp_val && io.fpu.nack_mem
val replay_mem = dmem_kill_mem || mem_reg_wen && mem_ll_wb || mem_reg_replay || fpu_kill_mem val replay_mem = dmem_kill_mem || mem_reg_wen && mem_ll_wb || mem_reg_replay || fpu_kill_mem
val killm_common = mem_reg_wen && mem_ll_wb || take_pc_wb || mem_reg_xcpt || !mem_reg_valid val killm_common = mem_reg_wen && mem_ll_wb || take_pc_wb || mem_reg_xcpt || !mem_reg_valid
@ -734,7 +734,7 @@ class Control(implicit conf: RocketConfiguration) extends Component
id_ex_hazard || id_mem_hazard || id_wb_hazard || id_ex_hazard || id_mem_hazard || id_wb_hazard ||
id_stall_raddr1 || id_stall_raddr2 || id_stall_waddr || id_stall_raddr1 || id_stall_raddr2 || id_stall_waddr ||
id_fp_val && id_stall_fpu || id_fp_val && id_stall_fpu ||
id_mem_val && !(io.dmem.req.ready && io.dtlb_rdy) || id_mem_val && !io.dmem.req.ready ||
vec_stalld vec_stalld
ctrl_killd := !io.imem.resp.valid || take_pc || ctrl_stalld || id_interrupt ctrl_killd := !io.imem.resp.valid || take_pc || ctrl_stalld || id_interrupt
@ -772,10 +772,9 @@ class Control(implicit conf: RocketConfiguration) extends Component
io.fpu.killx := ctrl_killx io.fpu.killx := ctrl_killx
io.fpu.killm := killm_common io.fpu.killm := killm_common
io.dtlb_val := ex_reg_mem_val
io.dtlb_kill := !mem_reg_valid
io.dmem.req.valid := ex_reg_mem_val io.dmem.req.valid := ex_reg_mem_val
io.dmem.req.bits.kill := killm_common || mem_xcpt || io.dtlb_miss io.dmem.req.bits.kill := killm_common || mem_xcpt
io.dmem.req.bits.cmd := ex_reg_mem_cmd io.dmem.req.bits.cmd := ex_reg_mem_cmd
io.dmem.req.bits.typ := ex_reg_mem_type io.dmem.req.bits.typ := ex_reg_mem_type
io.dmem.req.bits.phys := Bool(false)
} }

View File

@ -13,9 +13,8 @@ class Datapath(implicit conf: RocketConfiguration) extends Component
val ctrl = new ioCtrlDpath().flip val ctrl = new ioCtrlDpath().flip
val dmem = new ioHellaCache()(conf.dcache) val dmem = new ioHellaCache()(conf.dcache)
val dtlb = new ioDTLB_CPU_req_bundle().asOutput() val dtlb = new ioDTLB_CPU_req_bundle().asOutput()
val ptw = new IODatapathPTW().flip
val imem = new IOCPUFrontend()(conf.icache) val imem = new IOCPUFrontend()(conf.icache)
val ptbr_wen = Bool(OUTPUT);
val ptbr = UFix(OUTPUT, PADDR_BITS);
val fpu = new ioDpathFPU(); val fpu = new ioDpathFPU();
val vec_ctrl = new ioCtrlDpathVec().flip val vec_ctrl = new ioCtrlDpathVec().flip
val vec_iface = new ioDpathVecInterface() val vec_iface = new ioDpathVecInterface()
@ -81,9 +80,7 @@ class Datapath(implicit conf: RocketConfiguration) extends Component
val ex_effective_address = Cat(ex_ea_sign, ex_alu_adder_out(VADDR_BITS-1,0)).toUFix val ex_effective_address = Cat(ex_ea_sign, ex_alu_adder_out(VADDR_BITS-1,0)).toUFix
// hook up I$ // hook up I$
io.imem.req.bits.invalidateTLB := pcr.io.ptbr_wen
io.imem.req.bits.currentpc := ex_reg_pc io.imem.req.bits.currentpc := ex_reg_pc
io.imem.req.bits.status := pcr.io.status
io.imem.req.bits.pc := io.imem.req.bits.pc :=
Mux(io.ctrl.sel_pc === PC_EX4, ex_pc_plus4, Mux(io.ctrl.sel_pc === PC_EX4, ex_pc_plus4,
Mux(io.ctrl.sel_pc === PC_EX, Mux(io.ctrl.ex_jalr, ex_effective_address, ex_branch_target), Mux(io.ctrl.sel_pc === PC_EX, Mux(io.ctrl.ex_jalr, ex_effective_address, ex_branch_target),
@ -209,7 +206,7 @@ class Datapath(implicit conf: RocketConfiguration) extends Component
// D$ request interface (registered inside D$ module) // D$ request interface (registered inside D$ module)
// other signals (req_val, req_rdy) connect to control module // other signals (req_val, req_rdy) connect to control module
io.dmem.req.bits.idx := ex_effective_address io.dmem.req.bits.addr := ex_effective_address
io.dmem.req.bits.data := Mux(io.ctrl.mem_fp_val, io.fpu.store_data, mem_reg_rs2) io.dmem.req.bits.data := Mux(io.ctrl.mem_fp_val, io.fpu.store_data, mem_reg_rs2)
io.dmem.req.bits.tag := Cat(ex_reg_waddr, io.ctrl.ex_fp_val) io.dmem.req.bits.tag := Cat(ex_reg_waddr, io.ctrl.ex_fp_val)
require(io.dmem.req.bits.tag.getWidth >= 6) require(io.dmem.req.bits.tag.getWidth >= 6)
@ -225,8 +222,10 @@ class Datapath(implicit conf: RocketConfiguration) extends Component
io.ctrl.irq_ipi := pcr.io.irq_ipi; io.ctrl.irq_ipi := pcr.io.irq_ipi;
io.ctrl.status := pcr.io.status; io.ctrl.status := pcr.io.status;
io.ctrl.pcr_replay := pcr.io.replay io.ctrl.pcr_replay := pcr.io.replay
io.ptbr := pcr.io.ptbr;
io.ptbr_wen := pcr.io.ptbr_wen; io.ptw.ptbr := pcr.io.ptbr
io.ptw.invalidate := pcr.io.ptbr_wen
io.ptw.status := pcr.io.status
// branch resolution logic // branch resolution logic
io.ctrl.jalr_eq := ex_reg_rs1 === id_pc.toFix && ex_reg_op2(id_imm_small.getWidth-1,0) === UFix(0) io.ctrl.jalr_eq := ex_reg_rs1 === id_pc.toFix && ex_reg_op2(id_imm_small.getWidth-1,0) === UFix(0)

View File

@ -5,8 +5,9 @@ import Node._
import Constants._ import Constants._
import Instructions._ import Instructions._
import Util._ import Util._
import FPConstants._
object rocketFPConstants object FPConstants
{ {
val FCMD_ADD = Bits("b000000") val FCMD_ADD = Bits("b000000")
val FCMD_SUB = Bits("b000001") val FCMD_SUB = Bits("b000001")
@ -45,7 +46,6 @@ object rocketFPConstants
val FCMD_WIDTH = 6 val FCMD_WIDTH = 6
val FSR_WIDTH = 8 val FSR_WIDTH = 8
} }
import rocketFPConstants._
class FPUCtrlSigs extends Bundle class FPUCtrlSigs extends Bundle
{ {
@ -64,7 +64,7 @@ class FPUCtrlSigs extends Bundle
val wrfsr = Bool() val wrfsr = Bool()
} }
class rocketFPUDecoder extends Component class FPUDecoder extends Component
{ {
val io = new Bundle { val io = new Bundle {
val inst = Bits(INPUT, 32) val inst = Bits(INPUT, 32)
@ -378,7 +378,7 @@ class ioFMA(width: Int) extends Bundle {
val exc = Bits(OUTPUT, 5) val exc = Bits(OUTPUT, 5)
} }
class rocketFPUSFMAPipe(val latency: Int) extends Component class FPUSFMAPipe(val latency: Int) extends Component
{ {
val io = new ioFMA(33) val io = new ioFMA(33)
@ -415,7 +415,7 @@ class rocketFPUSFMAPipe(val latency: Int) extends Component
io.exc := Pipe(valid, fma.io.exceptionFlags, latency-1).bits io.exc := Pipe(valid, fma.io.exceptionFlags, latency-1).bits
} }
class rocketFPUDFMAPipe(val latency: Int) extends Component class FPUDFMAPipe(val latency: Int) extends Component
{ {
val io = new ioFMA(65) val io = new ioFMA(65)
@ -452,7 +452,7 @@ class rocketFPUDFMAPipe(val latency: Int) extends Component
io.exc := Pipe(valid, fma.io.exceptionFlags, latency-1).bits io.exc := Pipe(valid, fma.io.exceptionFlags, latency-1).bits
} }
class rocketFPU(sfma_latency: Int, dfma_latency: Int) extends Component class FPU(sfma_latency: Int, dfma_latency: Int) extends Component
{ {
val io = new Bundle { val io = new Bundle {
val ctrl = new ioCtrlFPU().flip val ctrl = new ioCtrlFPU().flip
@ -470,7 +470,7 @@ class rocketFPU(sfma_latency: Int, dfma_latency: Int) extends Component
val killm = io.ctrl.killm || io.ctrl.nack_mem val killm = io.ctrl.killm || io.ctrl.nack_mem
val wb_reg_valid = Reg(mem_reg_valid && !killm, resetVal = Bool(false)) val wb_reg_valid = Reg(mem_reg_valid && !killm, resetVal = Bool(false))
val fp_decoder = new rocketFPUDecoder val fp_decoder = new FPUDecoder
fp_decoder.io.inst := io.dpath.inst fp_decoder.io.inst := io.dpath.inst
val ctrl = RegEn(fp_decoder.io.sigs, io.ctrl.valid) val ctrl = RegEn(fp_decoder.io.sigs, io.ctrl.valid)
@ -530,7 +530,7 @@ class rocketFPU(sfma_latency: Int, dfma_latency: Int) extends Component
val cmd_fma = mem_ctrl.cmd === FCMD_MADD || mem_ctrl.cmd === FCMD_MSUB || val cmd_fma = mem_ctrl.cmd === FCMD_MADD || mem_ctrl.cmd === FCMD_MSUB ||
mem_ctrl.cmd === FCMD_NMADD || mem_ctrl.cmd === FCMD_NMSUB mem_ctrl.cmd === FCMD_NMADD || mem_ctrl.cmd === FCMD_NMSUB
val cmd_addsub = mem_ctrl.cmd === FCMD_ADD || mem_ctrl.cmd === FCMD_SUB val cmd_addsub = mem_ctrl.cmd === FCMD_ADD || mem_ctrl.cmd === FCMD_SUB
val sfma = new rocketFPUSFMAPipe(sfma_latency) val sfma = new FPUSFMAPipe(sfma_latency)
sfma.io.valid := io.sfma.valid || ex_reg_valid && ctrl.fma && ctrl.single sfma.io.valid := io.sfma.valid || ex_reg_valid && ctrl.fma && ctrl.single
sfma.io.in1 := Mux(io.sfma.valid, io.sfma.in1, ex_rs1) sfma.io.in1 := Mux(io.sfma.valid, io.sfma.in1, ex_rs1)
sfma.io.in2 := Mux(io.sfma.valid, io.sfma.in2, ex_rs2) sfma.io.in2 := Mux(io.sfma.valid, io.sfma.in2, ex_rs2)
@ -540,7 +540,7 @@ class rocketFPU(sfma_latency: Int, dfma_latency: Int) extends Component
io.sfma.out := sfma.io.out io.sfma.out := sfma.io.out
io.sfma.exc := sfma.io.exc io.sfma.exc := sfma.io.exc
val dfma = new rocketFPUDFMAPipe(dfma_latency) val dfma = new FPUDFMAPipe(dfma_latency)
dfma.io.valid := io.dfma.valid || ex_reg_valid && ctrl.fma && !ctrl.single dfma.io.valid := io.dfma.valid || ex_reg_valid && ctrl.fma && !ctrl.single
dfma.io.in1 := Mux(io.dfma.valid, io.dfma.in1, ex_rs1) dfma.io.in1 := Mux(io.dfma.valid, io.dfma.in1, ex_rs1)
dfma.io.in2 := Mux(io.dfma.valid, io.dfma.in2, ex_rs2) dfma.io.in2 := Mux(io.dfma.valid, io.dfma.in2, ex_rs2)

View File

@ -29,9 +29,7 @@ case class ICacheConfig(sets: Int, assoc: Int, co: CoherencePolicyWithUncached,
class FrontendReq extends Bundle { class FrontendReq extends Bundle {
val pc = UFix(width = VADDR_BITS+1) val pc = UFix(width = VADDR_BITS+1)
val status = Bits(width = 32)
val invalidate = Bool() val invalidate = Bool()
val invalidateTLB = Bool()
val mispredict = Bool() val mispredict = Bool()
val taken = Bool() val taken = Bool()
val currentpc = UFix(width = VADDR_BITS+1) val currentpc = UFix(width = VADDR_BITS+1)
@ -99,14 +97,13 @@ class Frontend(implicit c: ICacheConfig) extends Component
btb.io.clr := !io.cpu.req.bits.taken btb.io.clr := !io.cpu.req.bits.taken
btb.io.correct_pc := io.cpu.req.bits.currentpc btb.io.correct_pc := io.cpu.req.bits.currentpc
btb.io.correct_target := io.cpu.req.bits.pc btb.io.correct_target := io.cpu.req.bits.pc
btb.io.invalidate := io.cpu.req.bits.invalidate || io.cpu.req.bits.invalidateTLB btb.io.invalidate := io.cpu.req.bits.invalidate || io.cpu.ptw.invalidate
tlb.io.ptw <> io.cpu.ptw tlb.io.ptw <> io.cpu.ptw
tlb.io.req.valid := !stall && !icmiss tlb.io.req.valid := !stall && !icmiss
tlb.io.req.bits.vpn := s1_pc >> UFix(PGIDX_BITS) tlb.io.req.bits.vpn := s1_pc >> UFix(PGIDX_BITS)
tlb.io.req.bits.status := io.cpu.req.bits.status
tlb.io.req.bits.asid := UFix(0) tlb.io.req.bits.asid := UFix(0)
tlb.io.req.bits.invalidate := io.cpu.req.bits.invalidateTLB tlb.io.req.bits.passthrough := Bool(false)
tlb.io.req.bits.instruction := Bool(true) tlb.io.req.bits.instruction := Bool(true)
icache.io.mem <> io.mem icache.io.mem <> io.mem

View File

@ -3,16 +3,18 @@ package rocket
import Chisel._ import Chisel._
import Constants._ import Constants._
import uncore._ import uncore._
import Util._
case class DCacheConfig(sets: Int, ways: Int, co: CoherencePolicy, case class DCacheConfig(sets: Int, ways: Int, co: CoherencePolicy,
nmshr: Int, nsecondary: Int, nsdq: Int, nmshr: Int, nrpq: Int, nsdq: Int,
reqtagbits: Int = -1) reqtagbits: Int = -1)
{ {
require(isPow2(sets)) require(isPow2(sets))
require(isPow2(ways)) // TODO: relax this require(isPow2(ways)) // TODO: relax this
def lines = sets*ways def lines = sets*ways
def dm = ways == 1 def dm = ways == 1
def ppnbits = PPN_BITS def ppnbits = PADDR_BITS - PGIDX_BITS
def vpnbits = VADDR_BITS - PGIDX_BITS
def pgidxbits = PGIDX_BITS def pgidxbits = PGIDX_BITS
def offbits = OFFSET_BITS def offbits = OFFSET_BITS
def paddrbits = ppnbits + pgidxbits def paddrbits = ppnbits + pgidxbits
@ -161,7 +163,7 @@ class MSHR(id: Int)(implicit conf: DCacheConfig) extends Component {
val req_sec_val = Bool(INPUT) val req_sec_val = Bool(INPUT)
val req_sec_rdy = Bool(OUTPUT) val req_sec_rdy = Bool(OUTPUT)
val req_bits = new MSHRReq().asInput val req_bits = new MSHRReq().asInput
val req_sdq_id = UFix(INPUT, log2Up(NSDQ)) val req_sdq_id = UFix(INPUT, log2Up(conf.nsdq))
val idx_match = Bool(OUTPUT) val idx_match = Bool(OUTPUT)
val idx = Bits(OUTPUT, conf.idxbits) val idx = Bits(OUTPUT, conf.idxbits)
@ -194,7 +196,7 @@ class MSHR(id: Int)(implicit conf: DCacheConfig) extends Component {
val idx_match = req.idx === io.req_bits.idx val idx_match = req.idx === io.req_bits.idx
val sec_rdy = idx_match && !flush && (state === s_wb_req || state === s_wb_resp || state === s_meta_clear || (state === s_refill_req || state === s_refill_resp) && !conf.co.needsTransactionOnSecondaryMiss(req_cmd, io.mem_req.bits)) val sec_rdy = idx_match && !flush && (state === s_wb_req || state === s_wb_resp || state === s_meta_clear || (state === s_refill_req || state === s_refill_resp) && !conf.co.needsTransactionOnSecondaryMiss(req_cmd, io.mem_req.bits))
val rpq = (new Queue(NRPQ)) { new RPQEntry } val rpq = (new Queue(conf.nrpq)) { new RPQEntry }
rpq.io.enq.valid := (io.req_pri_val && io.req_pri_rdy || io.req_sec_val && sec_rdy) && req_use_rpq rpq.io.enq.valid := (io.req_pri_val && io.req_pri_rdy || io.req_sec_val && sec_rdy) && req_use_rpq
rpq.io.enq.bits := io.req_bits rpq.io.enq.bits := io.req_bits
rpq.io.enq.bits.sdq_id := io.req_sdq_id rpq.io.enq.bits.sdq_id := io.req_sdq_id
@ -312,24 +314,24 @@ class MSHRFile(implicit conf: DCacheConfig) extends Component {
val cpu_resp_tag = Bits(OUTPUT, conf.reqtagbits) val cpu_resp_tag = Bits(OUTPUT, conf.reqtagbits)
} }
val sdq_val = Reg(resetVal = Bits(0, NSDQ)) val sdq_val = Reg(resetVal = Bits(0, conf.nsdq))
val sdq_alloc_id = PriorityEncoder(~sdq_val(NSDQ-1,0)) val sdq_alloc_id = PriorityEncoder(~sdq_val(conf.nsdq-1,0))
val sdq_rdy = !sdq_val.andR val sdq_rdy = !sdq_val.andR
val (req_read, req_write) = cpuCmdToRW(io.req.bits.cmd) val (req_read, req_write) = cpuCmdToRW(io.req.bits.cmd)
val sdq_enq = io.req.valid && io.req.ready && req_write val sdq_enq = io.req.valid && io.req.ready && req_write
val sdq = Mem(NSDQ) { io.req.bits.data.clone } val sdq = Mem(conf.nsdq) { io.req.bits.data.clone }
when (sdq_enq) { sdq(sdq_alloc_id) := io.req.bits.data } when (sdq_enq) { sdq(sdq_alloc_id) := io.req.bits.data }
val idxMatch = Vec(NMSHR) { Bool() } val idxMatch = Vec(conf.nmshr) { Bool() }
val tagList = Vec(NMSHR) { Bits() } val tagList = Vec(conf.nmshr) { Bits() }
val wbTagList = Vec(NMSHR) { Bits() } val wbTagList = Vec(conf.nmshr) { Bits() }
val memRespMux = Vec(NMSHR) { new DataArrayReq } val memRespMux = Vec(conf.nmshr) { new DataArrayReq }
val meta_req_arb = (new Arbiter(NMSHR)) { new MetaArrayReq() } val meta_req_arb = (new Arbiter(conf.nmshr)) { new MetaArrayReq() }
val mem_req_arb = (new Arbiter(NMSHR)) { new TransactionInit } val mem_req_arb = (new Arbiter(conf.nmshr)) { new TransactionInit }
val mem_finish_arb = (new Arbiter(NMSHR)) { new TransactionFinish } val mem_finish_arb = (new Arbiter(conf.nmshr)) { new TransactionFinish }
val wb_req_arb = (new Arbiter(NMSHR)) { new WritebackReq } val wb_req_arb = (new Arbiter(conf.nmshr)) { new WritebackReq }
val replay_arb = (new Arbiter(NMSHR)) { new Replay() } val replay_arb = (new Arbiter(conf.nmshr)) { new Replay() }
val alloc_arb = (new Arbiter(NMSHR)) { Bool() } val alloc_arb = (new Arbiter(conf.nmshr)) { Bool() }
val tag_match = Mux1H(idxMatch, tagList) === io.req.bits.tag val tag_match = Mux1H(idxMatch, tagList) === io.req.bits.tag
val wb_probe_match = Mux1H(idxMatch, wbTagList) === io.req.bits.tag val wb_probe_match = Mux1H(idxMatch, wbTagList) === io.req.bits.tag
@ -341,7 +343,7 @@ class MSHRFile(implicit conf: DCacheConfig) extends Component {
var writeback_probe_rdy = Bool(true) var writeback_probe_rdy = Bool(true)
var refill_probe_rdy = Bool(true) var refill_probe_rdy = Bool(true)
for (i <- 0 to NMSHR-1) { for (i <- 0 to conf.nmshr-1) {
val mshr = new MSHR(i) val mshr = new MSHR(i)
idxMatch(i) := mshr.io.idx_match idxMatch(i) := mshr.io.idx_match
@ -400,8 +402,8 @@ class MSHRFile(implicit conf: DCacheConfig) extends Component {
val (replay_read, replay_write) = cpuCmdToRW(replay.bits.cmd) val (replay_read, replay_write) = cpuCmdToRW(replay.bits.cmd)
val sdq_free = replay.valid && replay.ready && replay_write val sdq_free = replay.valid && replay.ready && replay_write
sdq_val := sdq_val & ~((UFix(1) << replay.bits.sdq_id) & Fill(sdq_free, NSDQ)) | sdq_val := sdq_val & ~((UFix(1) << replay.bits.sdq_id) & Fill(sdq_free, conf.nsdq)) |
PriorityEncoderOH(~sdq_val(NSDQ-1,0)) & Fill(NSDQ, sdq_enq && io.req.bits.tag_miss) PriorityEncoderOH(~sdq_val(conf.nsdq-1,0)) & Fill(conf.nsdq, sdq_enq && io.req.bits.tag_miss)
val sdq_rdata = Reg() { io.req.bits.data.clone } val sdq_rdata = Reg() { io.req.bits.data.clone }
sdq_rdata := sdq(Mux(replay.valid && !replay.ready, replay.bits.sdq_id, replay_arb.io.out.bits.sdq_id)) sdq_rdata := sdq(Mux(replay.valid && !replay.ready, replay.bits.sdq_id, replay_arb.io.out.bits.sdq_id))
io.data_req.bits.data := sdq_rdata io.data_req.bits.data := sdq_rdata
@ -711,8 +713,8 @@ class AMOALU extends Component {
class HellaCacheReq(implicit conf: DCacheConfig) extends Bundle { class HellaCacheReq(implicit conf: DCacheConfig) extends Bundle {
val kill = Bool() val kill = Bool()
val typ = Bits(width = 3) val typ = Bits(width = 3)
val idx = Bits(width = conf.pgidxbits) val phys = Bool()
val ppn = Bits(width = conf.ppnbits) val addr = UFix(width = conf.ppnbits.max(conf.vpnbits+1) + conf.pgidxbits)
val data = Bits(width = conf.databits) val data = Bits(width = conf.databits)
val tag = Bits(width = conf.reqtagbits) val tag = Bits(width = conf.reqtagbits)
val cmd = Bits(width = 4) val cmd = Bits(width = 4)
@ -739,6 +741,7 @@ class AlignmentExceptions extends Bundle {
class HellaCacheExceptions extends Bundle { class HellaCacheExceptions extends Bundle {
val ma = new AlignmentExceptions val ma = new AlignmentExceptions
val pf = new AlignmentExceptions
} }
// interface between D$ and processor/DTLB // interface between D$ and processor/DTLB
@ -746,6 +749,7 @@ class ioHellaCache(implicit conf: DCacheConfig) extends Bundle {
val req = (new FIFOIO){ new HellaCacheReq } val req = (new FIFOIO){ new HellaCacheReq }
val resp = (new PipeIO){ new HellaCacheResp }.flip val resp = (new PipeIO){ new HellaCacheResp }.flip
val xcpt = (new HellaCacheExceptions).asInput val xcpt = (new HellaCacheExceptions).asInput
val ptw = new IOTLBPTW().flip
} }
class HellaCache(implicit conf: DCacheConfig) extends Component { class HellaCache(implicit conf: DCacheConfig) extends Component {
@ -768,6 +772,8 @@ class HellaCache(implicit conf: DCacheConfig) extends Component {
val early_nack = Reg { Bool() } val early_nack = Reg { Bool() }
val r_cpu_req_val_ = Reg(io.cpu.req.valid && io.cpu.req.ready, resetVal = Bool(false)) val r_cpu_req_val_ = Reg(io.cpu.req.valid && io.cpu.req.ready, resetVal = Bool(false))
val r_cpu_req_val = r_cpu_req_val_ && !io.cpu.req.bits.kill && !early_nack val r_cpu_req_val = r_cpu_req_val_ && !io.cpu.req.bits.kill && !early_nack
val r_cpu_req_phys = Reg() { Bool() }
val r_cpu_req_vpn = Reg() { UFix() }
val r_cpu_req_idx = Reg() { Bits() } val r_cpu_req_idx = Reg() { Bits() }
val r_cpu_req_cmd = Reg() { Bits() } val r_cpu_req_cmd = Reg() { Bits() }
val r_cpu_req_type = Reg() { Bits() } val r_cpu_req_type = Reg() { Bits() }
@ -799,6 +805,14 @@ class HellaCache(implicit conf: DCacheConfig) extends Component {
val r_req_readwrite = r_req_read || r_req_write || r_req_prefetch val r_req_readwrite = r_req_read || r_req_write || r_req_prefetch
val nack_hit = Bool() val nack_hit = Bool()
val dtlb = new TLB(8)
dtlb.io.ptw <> io.cpu.ptw
dtlb.io.req.valid := r_cpu_req_val_ && r_req_readwrite && !r_cpu_req_phys
dtlb.io.req.bits.passthrough := r_cpu_req_phys
dtlb.io.req.bits.asid := UFix(0)
dtlb.io.req.bits.vpn := r_cpu_req_vpn
dtlb.io.req.bits.instruction := Bool(false)
val wb = new WritebackUnit val wb = new WritebackUnit
val prober = new ProbeUnit val prober = new ProbeUnit
val mshr = new MSHRFile val mshr = new MSHRFile
@ -812,7 +826,9 @@ class HellaCache(implicit conf: DCacheConfig) extends Component {
flusher.io.mshr_req.ready := mshr.io.req.ready flusher.io.mshr_req.ready := mshr.io.req.ready
when (io.cpu.req.valid) { when (io.cpu.req.valid) {
r_cpu_req_idx := io.cpu.req.bits.idx r_cpu_req_phys := io.cpu.req.bits.phys
r_cpu_req_vpn := io.cpu.req.bits.addr >> taglsb
r_cpu_req_idx := io.cpu.req.bits.addr(indexmsb,0)
r_cpu_req_cmd := io.cpu.req.bits.cmd r_cpu_req_cmd := io.cpu.req.bits.cmd
r_cpu_req_type := io.cpu.req.bits.typ r_cpu_req_type := io.cpu.req.bits.typ
r_cpu_req_tag := io.cpu.req.bits.tag r_cpu_req_tag := io.cpu.req.bits.tag
@ -839,8 +855,10 @@ class HellaCache(implicit conf: DCacheConfig) extends Component {
(((r_cpu_req_type === MT_W) || (r_cpu_req_type === MT_WU)) && (r_cpu_req_idx(1,0) != Bits(0))) || (((r_cpu_req_type === MT_W) || (r_cpu_req_type === MT_WU)) && (r_cpu_req_idx(1,0) != Bits(0))) ||
((r_cpu_req_type === MT_D) && (r_cpu_req_idx(2,0) != Bits(0))); ((r_cpu_req_type === MT_D) && (r_cpu_req_idx(2,0) != Bits(0)));
io.cpu.xcpt.ma.ld := r_cpu_req_val_ && !early_nack && r_req_read && misaligned io.cpu.xcpt.ma.ld := r_cpu_req_val_ && r_req_read && misaligned
io.cpu.xcpt.ma.st := r_cpu_req_val_ && !early_nack && r_req_write && misaligned io.cpu.xcpt.ma.st := r_cpu_req_val_ && r_req_write && misaligned
io.cpu.xcpt.pf.ld := r_cpu_req_val_ && r_req_read && dtlb.io.resp.xcpt_ld
io.cpu.xcpt.pf.st := r_cpu_req_val_ && r_req_write && dtlb.io.resp.xcpt_st
// tags // tags
val meta = new MetaDataArrayArray(lines) val meta = new MetaDataArrayArray(lines)
@ -855,11 +873,11 @@ class HellaCache(implicit conf: DCacheConfig) extends Component {
// cpu tag check // cpu tag check
meta_arb.io.in(3).valid := io.cpu.req.valid meta_arb.io.in(3).valid := io.cpu.req.valid
meta_arb.io.in(3).bits.idx := io.cpu.req.bits.idx(indexmsb,indexlsb) meta_arb.io.in(3).bits.idx := io.cpu.req.bits.addr(indexmsb,indexlsb)
meta_arb.io.in(3).bits.rw := Bool(false) meta_arb.io.in(3).bits.rw := Bool(false)
meta_arb.io.in(3).bits.way_en := Fix(-1) meta_arb.io.in(3).bits.way_en := Fix(-1)
val early_tag_nack = !meta_arb.io.in(3).ready val early_tag_nack = !meta_arb.io.in(3).ready
val cpu_req_ppn = Mux(prober.io.mshr_req.valid, prober.io.addr >> UFix(conf.pgidxbits-conf.offbits), io.cpu.req.bits.ppn) val cpu_req_ppn = Mux(prober.io.mshr_req.valid, prober.io.addr >> UFix(conf.pgidxbits-conf.offbits), dtlb.io.resp.ppn)
val cpu_req_tag = Cat(cpu_req_ppn, r_cpu_req_idx)(tagmsb,taglsb) val cpu_req_tag = Cat(cpu_req_ppn, r_cpu_req_idx)(tagmsb,taglsb)
val tag_match_arr = (0 until conf.ways).map( w => conf.co.isValid(meta.io.resp(w).state) && (meta.io.resp(w).tag === cpu_req_tag)) val tag_match_arr = (0 until conf.ways).map( w => conf.co.isValid(meta.io.resp(w).state) && (meta.io.resp(w).tag === cpu_req_tag))
val tag_match = Cat(Bits(0),tag_match_arr:_*).orR val tag_match = Cat(Bits(0),tag_match_arr:_*).orR
@ -892,8 +910,8 @@ class HellaCache(implicit conf: DCacheConfig) extends Component {
data_arb.io.in(0).valid := io.mem.xact_rep.valid && conf.co.messageUpdatesDataArray(io.mem.xact_rep.bits) data_arb.io.in(0).valid := io.mem.xact_rep.valid && conf.co.messageUpdatesDataArray(io.mem.xact_rep.bits)
// load hits // load hits
data_arb.io.in(4).bits.offset := io.cpu.req.bits.idx(offsetmsb,ramindexlsb) data_arb.io.in(4).bits.offset := io.cpu.req.bits.addr(offsetmsb,ramindexlsb)
data_arb.io.in(4).bits.idx := io.cpu.req.bits.idx(indexmsb,indexlsb) data_arb.io.in(4).bits.idx := io.cpu.req.bits.addr(indexmsb,indexlsb)
data_arb.io.in(4).bits.rw := Bool(false) data_arb.io.in(4).bits.rw := Bool(false)
data_arb.io.in(4).valid := io.cpu.req.valid && req_read data_arb.io.in(4).valid := io.cpu.req.valid && req_read
data_arb.io.in(4).bits.way_en := Fix(-1) // intiate load on all ways, mux after tag check data_arb.io.in(4).bits.way_en := Fix(-1) // intiate load on all ways, mux after tag check
@ -1015,13 +1033,14 @@ class HellaCache(implicit conf: DCacheConfig) extends Component {
val pending_fence = Reg(resetVal = Bool(false)) val pending_fence = Reg(resetVal = Bool(false))
pending_fence := (r_cpu_req_val_ && r_req_fence || pending_fence) && !mshr.io.fence_rdy pending_fence := (r_cpu_req_val_ && r_req_fence || pending_fence) && !mshr.io.fence_rdy
nack_hit := p_store_match || replay_val || r_req_write && !p_store_rdy || nack_hit := p_store_match || replay_val || r_req_write && !p_store_rdy ||
p_store_idx_match && meta.io.state_req.valid p_store_idx_match && meta.io.state_req.valid ||
!r_cpu_req_phys && dtlb.io.resp.miss
val nack_miss = !mshr.io.req.ready val nack_miss = !mshr.io.req.ready
val nack_flush = !mshr.io.fence_rdy && (r_req_fence || r_req_flush) || val nack_flush = !mshr.io.fence_rdy && (r_req_fence || r_req_flush) ||
!flushed && r_req_flush !flushed && r_req_flush
val nack = early_nack || r_req_readwrite && (nack_hit || nack_miss) || nack_flush val nack = early_nack || r_req_readwrite && (nack_hit || nack_miss) || nack_flush
io.cpu.req.ready := flusher.io.req.ready && !(r_cpu_req_val_ && r_req_flush) && !pending_fence io.cpu.req.ready := flusher.io.req.ready && !(r_cpu_req_val_ && r_req_flush) && !pending_fence && (dtlb.io.req.ready || io.cpu.req.bits.phys)
io.cpu.resp.valid := (r_cpu_req_val && tag_hit && !mshr.io.secondary_miss && !nack && r_req_read) || mshr.io.cpu_resp_val io.cpu.resp.valid := (r_cpu_req_val && tag_hit && !mshr.io.secondary_miss && !nack && r_req_read) || mshr.io.cpu_resp_val
io.cpu.resp.bits.nack := r_cpu_req_val_ && !io.cpu.req.bits.kill && nack io.cpu.resp.bits.nack := r_cpu_req_val_ && !io.cpu.req.bits.kill && nack
io.cpu.resp.bits.replay := mshr.io.cpu_resp_val io.cpu.resp.bits.replay := mshr.io.cpu_resp_val

View File

@ -5,16 +5,31 @@ import Node._
import Constants._ import Constants._
import scala.math._ import scala.math._
class ioPTW(n: Int)(implicit conf: RocketConfiguration) extends Bundle class IOTLBPTW extends Bundle {
{ val req = new FIFOIO()(UFix(width = VPN_BITS))
val requestor = Vec(n) { new IOTLBPTW }.flip val resp = new PipeIO()(new Bundle {
val mem = new ioHellaCache()(conf.dcache) val error = Bool()
val ptbr = UFix(INPUT, PADDR_BITS) val ppn = UFix(width = PPN_BITS)
val perm = Bits(width = PERM_BITS)
}).flip
val status = Bits(INPUT, width = 32)
val invalidate = Bool(INPUT)
}
class IODatapathPTW extends Bundle {
val ptbr = UFix(INPUT, PADDR_BITS)
val invalidate = Bool(INPUT)
val status = Bits(INPUT, 32)
} }
class PTW(n: Int)(implicit conf: RocketConfiguration) extends Component class PTW(n: Int)(implicit conf: RocketConfiguration) extends Component
{ {
val io = new ioPTW(n) val io = new Bundle {
val requestor = Vec(n) { new IOTLBPTW }.flip
val mem = new ioHellaCache()(conf.dcache)
val dpath = new IODatapathPTW
}
val levels = 3 val levels = 3
val bitsPerLevel = VPN_BITS/levels val bitsPerLevel = VPN_BITS/levels
@ -27,7 +42,7 @@ class PTW(n: Int)(implicit conf: RocketConfiguration) extends Component
val r_req_vpn = Reg() { Bits() } val r_req_vpn = Reg() { Bits() }
val r_req_dest = Reg() { Bits() } val r_req_dest = Reg() { Bits() }
val req_addr = Reg() { Bits() } val req_addr = Reg() { UFix() }
val r_resp_ppn = Reg() { Bits() }; val r_resp_ppn = Reg() { Bits() };
val r_resp_perm = Reg() { Bits() }; val r_resp_perm = Reg() { Bits() };
@ -41,21 +56,21 @@ class PTW(n: Int)(implicit conf: RocketConfiguration) extends Component
when (arb.io.out.fire()) { when (arb.io.out.fire()) {
r_req_vpn := arb.io.out.bits r_req_vpn := arb.io.out.bits
r_req_dest := arb.io.chosen r_req_dest := arb.io.chosen
req_addr := Cat(io.ptbr(PADDR_BITS-1,PGIDX_BITS), arb.io.out.bits(VPN_BITS-1,VPN_BITS-bitsPerLevel), Bits(0,3)) req_addr := Cat(io.dpath.ptbr(PADDR_BITS-1,PGIDX_BITS), arb.io.out.bits(VPN_BITS-1,VPN_BITS-bitsPerLevel), UFix(0,3))
} }
val dmem_resp_val = Reg(io.mem.resp.valid, resetVal = Bool(false)) val dmem_resp_val = Reg(io.mem.resp.valid, resetVal = Bool(false))
when (dmem_resp_val) { when (dmem_resp_val) {
req_addr := Cat(io.mem.resp.bits.data_subword(PADDR_BITS-1, PGIDX_BITS), vpn_idx, Bits(0,3)) req_addr := Cat(io.mem.resp.bits.data_subword(PADDR_BITS-1, PGIDX_BITS), vpn_idx, UFix(0,3)).toUFix
r_resp_perm := io.mem.resp.bits.data_subword(9,4); r_resp_perm := io.mem.resp.bits.data_subword(9,4);
r_resp_ppn := io.mem.resp.bits.data_subword(PADDR_BITS-1, PGIDX_BITS); r_resp_ppn := io.mem.resp.bits.data_subword(PADDR_BITS-1, PGIDX_BITS);
} }
io.mem.req.valid := state === s_req io.mem.req.valid := state === s_req
io.mem.req.bits.phys := Bool(true)
io.mem.req.bits.cmd := M_XRD io.mem.req.bits.cmd := M_XRD
io.mem.req.bits.typ := MT_D io.mem.req.bits.typ := MT_D
io.mem.req.bits.idx := req_addr(PGIDX_BITS-1,0) io.mem.req.bits.addr := req_addr
io.mem.req.bits.ppn := Reg(req_addr(PADDR_BITS-1,PGIDX_BITS))
io.mem.req.bits.kill := Bool(false) io.mem.req.bits.kill := Bool(false)
val resp_val = state === s_done || state === s_error val resp_val = state === s_done || state === s_error
@ -73,6 +88,8 @@ class PTW(n: Int)(implicit conf: RocketConfiguration) extends Component
io.requestor(i).resp.bits.error := resp_err io.requestor(i).resp.bits.error := resp_err
io.requestor(i).resp.bits.perm := r_resp_perm io.requestor(i).resp.bits.perm := r_resp_perm
io.requestor(i).resp.bits.ppn := resp_ppn.toUFix io.requestor(i).resp.bits.ppn := resp_ppn.toUFix
io.requestor(i).invalidate := io.dpath.invalidate
io.requestor(i).status := io.dpath.status
} }
// control state machine // control state machine

View File

@ -13,7 +13,8 @@ case class RocketConfiguration(ntiles: Int, co: CoherencePolicyWithUncached,
class Tile(resetSignal: Bool = null)(confIn: RocketConfiguration) extends Component(resetSignal) class Tile(resetSignal: Bool = null)(confIn: RocketConfiguration) extends Component(resetSignal)
{ {
implicit val dcConf = confIn.dcache.copy(reqtagbits = confIn.dcacheReqTagBits + log2Up(DMEM_PORTS)) val memPorts = if (HAVE_VEC) 3 else 2
implicit val dcConf = confIn.dcache.copy(reqtagbits = confIn.dcacheReqTagBits + log2Up(memPorts))
implicit val conf = confIn.copy(dcache = dcConf) implicit val conf = confIn.copy(dcache = dcConf)
val io = new Bundle { val io = new Bundle {
@ -21,13 +22,13 @@ class Tile(resetSignal: Bool = null)(confIn: RocketConfiguration) extends Compon
val host = new ioHTIF(conf.ntiles) val host = new ioHTIF(conf.ntiles)
} }
val cpu = new rocketProc val core = new Core
val icache = new Frontend()(confIn.icache) val icache = new Frontend()(confIn.icache)
val dcache = new HellaCache val dcache = new HellaCache
val arbiter = new MemArbiter(DMEM_PORTS) val arbiter = new MemArbiter(memPorts)
arbiter.io.requestor(DMEM_DCACHE) <> dcache.io.mem arbiter.io.requestor(0) <> dcache.io.mem
arbiter.io.requestor(DMEM_ICACHE) <> icache.io.mem arbiter.io.requestor(1) <> icache.io.mem
io.tilelink.xact_init <> arbiter.io.mem.xact_init io.tilelink.xact_init <> arbiter.io.mem.xact_init
io.tilelink.xact_init_data <> dcache.io.mem.xact_init_data io.tilelink.xact_init_data <> dcache.io.mem.xact_init_data
@ -38,14 +39,13 @@ class Tile(resetSignal: Bool = null)(confIn: RocketConfiguration) extends Compon
io.tilelink.probe_rep <> dcache.io.mem.probe_rep io.tilelink.probe_rep <> dcache.io.mem.probe_rep
io.tilelink.probe_rep_data <> dcache.io.mem.probe_rep_data io.tilelink.probe_rep_data <> dcache.io.mem.probe_rep_data
if (HAVE_VEC) if (HAVE_VEC) {
{
val vicache = new Frontend()(ICacheConfig(128, 1, conf.co)) // 128 sets x 1 ways (8KB) val vicache = new Frontend()(ICacheConfig(128, 1, conf.co)) // 128 sets x 1 ways (8KB)
arbiter.io.requestor(DMEM_VICACHE) <> vicache.io.mem arbiter.io.requestor(2) <> vicache.io.mem
cpu.io.vimem <> vicache.io.cpu core.io.vimem <> vicache.io.cpu
} }
cpu.io.host <> io.host core.io.host <> io.host
cpu.io.imem <> icache.io.cpu core.io.imem <> icache.io.cpu
cpu.io.dmem <> dcache.io.cpu core.io.dmem <> dcache.io.cpu
} }

View File

@ -66,21 +66,11 @@ class PseudoLRU(n: Int)
} }
} }
class IOTLBPTW extends Bundle {
val req = new FIFOIO()(UFix(width = VPN_BITS))
val resp = new PipeIO()(new Bundle {
val error = Bool()
val ppn = UFix(width = PPN_BITS)
val perm = Bits(width = PERM_BITS)
}).flip
}
class TLBReq extends Bundle class TLBReq extends Bundle
{ {
val asid = UFix(width = ASID_BITS) val asid = UFix(width = ASID_BITS)
val vpn = UFix(width = VPN_BITS+1) val vpn = UFix(width = VPN_BITS+1)
val status = Bits(width = 32) val passthrough = Bool()
val invalidate = Bool()
val instruction = Bool() val instruction = Bool()
} }
@ -116,7 +106,7 @@ class TLB(entries: Int) extends Component
when (io.ptw.resp.valid) { tag_ram(r_refill_waddr) := io.ptw.resp.bits.ppn } when (io.ptw.resp.valid) { tag_ram(r_refill_waddr) := io.ptw.resp.bits.ppn }
val lookup_tag = Cat(io.req.bits.asid, io.req.bits.vpn).toUFix val lookup_tag = Cat(io.req.bits.asid, io.req.bits.vpn).toUFix
tag_cam.io.clear := io.req.bits.invalidate tag_cam.io.clear := io.ptw.invalidate
tag_cam.io.clear_hit := io.req.fire() && Mux(io.req.bits.instruction, io.resp.xcpt_if, io.resp.xcpt_ld && io.resp.xcpt_st) tag_cam.io.clear_hit := io.req.fire() && Mux(io.req.bits.instruction, io.resp.xcpt_if, io.resp.xcpt_ld && io.resp.xcpt_st)
tag_cam.io.tag := lookup_tag tag_cam.io.tag := lookup_tag
tag_cam.io.write := state === s_wait && io.ptw.resp.valid tag_cam.io.write := state === s_wait && io.ptw.resp.valid
@ -148,8 +138,8 @@ class TLB(entries: Int) extends Component
val plru = new PseudoLRU(entries) val plru = new PseudoLRU(entries)
val repl_waddr = Mux(has_invalid_entry, invalid_entry, plru.replace) val repl_waddr = Mux(has_invalid_entry, invalid_entry, plru.replace)
val status_s = io.req.bits.status(SR_S) // user/supervisor mode val status_s = io.ptw.status(SR_S) // user/supervisor mode
val status_vm = io.req.bits.status(SR_VM) // virtual memory enable val status_vm = io.ptw.status(SR_VM) // virtual memory enable
val bad_va = io.req.bits.vpn(VPN_BITS) != io.req.bits.vpn(VPN_BITS-1) val bad_va = io.req.bits.vpn(VPN_BITS) != io.req.bits.vpn(VPN_BITS-1)
val tlb_hit = status_vm && tag_hit val tlb_hit = status_vm && tag_hit
val tlb_miss = status_vm && !tag_hit && !bad_va val tlb_miss = status_vm && !tag_hit && !bad_va
@ -163,7 +153,7 @@ class TLB(entries: Int) extends Component
io.resp.xcpt_st := bad_va || tlb_hit && !Mux(status_s, sw_array(tag_hit_addr), uw_array(tag_hit_addr)) io.resp.xcpt_st := bad_va || tlb_hit && !Mux(status_s, sw_array(tag_hit_addr), uw_array(tag_hit_addr))
io.resp.xcpt_if := bad_va || tlb_hit && !Mux(status_s, sx_array(tag_hit_addr), ux_array(tag_hit_addr)) io.resp.xcpt_if := bad_va || tlb_hit && !Mux(status_s, sx_array(tag_hit_addr), ux_array(tag_hit_addr))
io.resp.miss := tlb_miss io.resp.miss := tlb_miss
io.resp.ppn := Mux(status_vm, Mux1H(tag_cam.io.hits, tag_ram), io.req.bits.vpn(PPN_BITS-1,0)) io.resp.ppn := Mux(status_vm && !io.req.bits.passthrough, Mux1H(tag_cam.io.hits, tag_ram), io.req.bits.vpn(PPN_BITS-1,0))
io.resp.hit_idx := tag_cam.io.hits io.resp.hit_idx := tag_cam.io.hits
io.ptw.req.valid := state === s_request io.ptw.req.valid := state === s_request
@ -175,15 +165,15 @@ class TLB(entries: Int) extends Component
r_refill_waddr := repl_waddr r_refill_waddr := repl_waddr
} }
when (state === s_request) { when (state === s_request) {
when (io.req.bits.invalidate) { when (io.ptw.invalidate) {
state := s_ready state := s_ready
} }
when (io.ptw.req.ready) { when (io.ptw.req.ready) {
state := s_wait state := s_wait
when (io.req.bits.invalidate) { state := s_wait_invalidate } when (io.ptw.invalidate) { state := s_wait_invalidate }
} }
} }
when (state === s_wait && io.req.bits.invalidate) { when (state === s_wait && io.ptw.invalidate) {
state := s_wait_invalidate state := s_wait_invalidate
} }
when ((state === s_wait || state === s_wait_invalidate) && io.ptw.resp.valid) { when ((state === s_wait || state === s_wait_invalidate) && io.ptw.resp.valid) {
@ -204,10 +194,6 @@ class ioDTLB_CPU_resp extends TLBResp(1)
class ioDTLB extends Bundle class ioDTLB extends Bundle
{ {
// status bits (from PCR), to check current permission and whether VM is enabled
val status = Bits(INPUT, 32)
// invalidate all TLB entries
val invalidate = Bool(INPUT)
val cpu_req = new ioDTLB_CPU_req().flip val cpu_req = new ioDTLB_CPU_req().flip
val cpu_resp = new ioDTLB_CPU_resp() val cpu_resp = new ioDTLB_CPU_resp()
val ptw = new IOTLBPTW val ptw = new IOTLBPTW
@ -225,8 +211,7 @@ class rocketTLB(entries: Int) extends Component
val tlb = new TLB(entries) val tlb = new TLB(entries)
tlb.io.req.valid := r_cpu_req_val && !io.cpu_req.bits.kill tlb.io.req.valid := r_cpu_req_val && !io.cpu_req.bits.kill
tlb.io.req.bits.instruction := Bool(false) tlb.io.req.bits.instruction := Bool(false)
tlb.io.req.bits.invalidate := io.invalidate tlb.io.req.bits.passthrough := Bool(false)
tlb.io.req.bits.status := io.status
tlb.io.req.bits.vpn := r_cpu_req_vpn tlb.io.req.bits.vpn := r_cpu_req_vpn
tlb.io.req.bits.asid := r_cpu_req_asid tlb.io.req.bits.asid := r_cpu_req_asid