diff --git a/rocket/src/main/scala/arbiter.scala b/rocket/src/main/scala/arbiter.scala index 3ad0e750..c876a243 100644 --- a/rocket/src/main/scala/arbiter.scala +++ b/rocket/src/main/scala/arbiter.scala @@ -39,6 +39,7 @@ class HellaCacheArbiter(n: Int)(implicit conf: RocketConfiguration) extends Modu val tag_hit = io.mem.resp.bits.tag(log2Up(n)-1,0) === UInt(i) resp.valid := io.mem.resp.valid && tag_hit io.requestor(i).xcpt := io.mem.xcpt + io.requestor(i).ordered := io.mem.ordered resp.bits := io.mem.resp.bits resp.bits.tag := io.mem.resp.bits.tag >> UInt(log2Up(n)) resp.bits.nack := io.mem.resp.bits.nack && tag_hit diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 89769f71..2a4da7af 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -282,25 +282,25 @@ object RoCCDecode extends DecodeConstants CUSTOM0-> List(Y, N,Y,BR_N, N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N), CUSTOM0_RS1-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N), CUSTOM0_RS1_RS2-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N), - CUSTOM0-> List(Y, N,Y,BR_N, N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N), + CUSTOM0_RD-> List(Y, N,Y,BR_N, N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N), CUSTOM0_RD_RS1-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N), CUSTOM0_RD_RS1_RS2->List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N), CUSTOM1-> List(Y, N,Y,BR_N, N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N), CUSTOM1_RS1-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N), CUSTOM1_RS1_RS2-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N), - CUSTOM1-> List(Y, N,Y,BR_N, N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N), + CUSTOM1_RD-> List(Y, N,Y,BR_N, N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N), CUSTOM1_RD_RS1-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N), CUSTOM1_RD_RS1_RS2->List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N), CUSTOM2-> List(Y, N,Y,BR_N, N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N), CUSTOM2_RS1-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N), CUSTOM2_RS1_RS2-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N), - CUSTOM2-> List(Y, N,Y,BR_N, N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N), + CUSTOM2_RD-> List(Y, N,Y,BR_N, N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N), CUSTOM2_RD_RS1-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N), CUSTOM2_RD_RS1_RS2->List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N), CUSTOM3-> List(Y, N,Y,BR_N, N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N), CUSTOM3_RS1-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N), CUSTOM3_RS1_RS2-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N), - CUSTOM3-> List(Y, N,Y,BR_N, N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N), + CUSTOM3_RD-> List(Y, N,Y,BR_N, N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N), CUSTOM3_RD_RS1-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N), CUSTOM3_RD_RS1_RS2->List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N)) } @@ -639,7 +639,7 @@ class Control(implicit conf: RocketConfiguration) extends Module io.fpu.dec.ren2 && id_raddr2 === io.dpath.ex_waddr || io.fpu.dec.ren3 && id_raddr3 === io.dpath.ex_waddr || io.fpu.dec.wen && id_waddr === io.dpath.ex_waddr) - val id_ex_hazard = data_hazard_ex && (ex_reg_pcr != PCR.N || ex_reg_mem_val || ex_reg_div_mul_val || ex_reg_fp_val) || + val id_ex_hazard = data_hazard_ex && (ex_reg_pcr != PCR.N || ex_reg_mem_val || ex_reg_div_mul_val || ex_reg_fp_val || ex_reg_rocc_val) || fp_data_hazard_ex && (ex_reg_mem_val || ex_reg_fp_val) // stall for RAW/WAW hazards on PCRs, LB/LH, and mul/div in memory stage. @@ -655,7 +655,7 @@ class Control(implicit conf: RocketConfiguration) extends Module io.fpu.dec.ren2 && id_raddr2 === io.dpath.mem_waddr || io.fpu.dec.ren3 && id_raddr3 === io.dpath.mem_waddr || io.fpu.dec.wen && id_waddr === io.dpath.mem_waddr) - val id_mem_hazard = data_hazard_mem && (mem_reg_pcr != PCR.N || mem_reg_mem_val && mem_mem_cmd_bh || mem_reg_div_mul_val || mem_reg_fp_val) || + val id_mem_hazard = data_hazard_mem && (mem_reg_pcr != PCR.N || mem_reg_mem_val && mem_mem_cmd_bh || mem_reg_div_mul_val || mem_reg_fp_val || mem_reg_rocc_val) || fp_data_hazard_mem && mem_reg_fp_val id_load_use := mem_reg_mem_val && (data_hazard_mem || fp_data_hazard_mem) @@ -669,7 +669,7 @@ class Control(implicit conf: RocketConfiguration) extends Module io.fpu.dec.ren2 && id_raddr2 === io.dpath.wb_waddr || io.fpu.dec.ren3 && id_raddr3 === io.dpath.wb_waddr || io.fpu.dec.wen && id_waddr === io.dpath.wb_waddr) - val id_wb_hazard = data_hazard_wb && (wb_dcache_miss || wb_reg_div_mul_val) || + val id_wb_hazard = data_hazard_wb && (wb_dcache_miss || wb_reg_div_mul_val || wb_reg_rocc_val) || fp_data_hazard_wb && (wb_dcache_miss || wb_reg_fp_val) io.dpath.mem_ll_bypass_rs1 := io.dpath.mem_ll_wb && io.dpath.mem_ll_waddr === id_raddr1 diff --git a/rocket/src/main/scala/decode.scala b/rocket/src/main/scala/decode.scala index 502c1fee..6c8432d8 100644 --- a/rocket/src/main/scala/decode.scala +++ b/rocket/src/main/scala/decode.scala @@ -5,12 +5,13 @@ import Node._ object DecodeLogic { - def term(b: Literal) = { - if (b.isZ) { - var (bits, mask, swidth) = Literal.parseLit(b.toString) - new Term(BigInt(bits, 2), BigInt(2).pow(b.width)-(BigInt(mask, 2)+1)) + def term(b: Bits) = { + val lit = b.litOf + if (lit.isZ) { + var (bits, mask, swidth) = Literal.parseLit(lit.toString) + new Term(BigInt(bits, 2), BigInt(2).pow(lit.width)-(BigInt(mask, 2)+1)) } else { - new Term(b.value) + new Term(lit.value) } } def logic(addr: Bits, addrWidth: Int, cache: scala.collection.mutable.Map[Term,Bits], terms: Seq[Term]) = { @@ -24,18 +25,17 @@ object DecodeLogic var map = mapping var cache = scala.collection.mutable.Map[Term,Bits]() default map { d => - val dlit = d.litOf - val dterm = term(dlit) + val dterm = term(d) val (keys, values) = map.unzip val addrWidth = keys.map(_.getWidth).max - val terms = keys.toList.map(k => term(k.litOf)) - val termvalues = terms zip values.toList.map(v => term(v.head.litOf)) + val terms = keys.toList.map(k => term(k)) + val termvalues = terms zip values.toList.map(v => term(v.head)) - for (t <- terms.tails; if !t.isEmpty) + for (t <- keys.zip(terms).tails; if !t.isEmpty) for (u <- t.tail) - assert(!t.head.intersects(u), "DecodeLogic: keys " + t + " and " + u + " overlap") + assert(!t.head._2.intersects(u._2), "DecodeLogic: keys " + t.head + " and " + u + " overlap") - val result = (0 until math.max(dlit.width, values.map(_.head.litOf.width).max)).map({ case (i: Int) => + val result = (0 until math.max(d.litOf.width, values.map(_.head.litOf.width).max)).map({ case (i: Int) => val mint = termvalues.filter { case (k,t) => ((t.mask >> i) & 1) == 0 && ((t.value >> i) & 1) == 1 }.map(_._1) val maxt = termvalues.filter { case (k,t) => ((t.mask >> i) & 1) == 0 && ((t.value >> i) & 1) == 0 }.map(_._1) val dc = termvalues.filter { case (k,t) => ((t.mask >> i) & 1) == 1 }.map(_._1) diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index 898e5e5a..7b4c7bc6 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -244,7 +244,8 @@ class Datapath(implicit conf: RocketConfiguration) extends Module val dmem_resp_xpu = !io.dmem.resp.bits.tag(0).toBool val dmem_resp_fpu = io.dmem.resp.bits.tag(0).toBool val dmem_resp_waddr = io.dmem.resp.bits.tag.toUInt >> UInt(1) - val dmem_resp_replay = io.dmem.resp.bits.replay && dmem_resp_xpu + val dmem_resp_valid = io.dmem.resp.valid && io.dmem.resp.bits.has_data + val dmem_resp_replay = io.dmem.resp.bits.replay && io.dmem.resp.bits.has_data val mem_ll_wdata = Bits() mem_ll_wdata := div.io.resp.bits.data @@ -259,7 +260,7 @@ class Datapath(implicit conf: RocketConfiguration) extends Module io.ctrl.mem_ll_wb := Bool(true) } } - when (dmem_resp_replay) { + when (dmem_resp_replay && dmem_resp_xpu) { div.io.resp.ready := Bool(false) if (!conf.rocc.isEmpty) io.rocc.resp.ready := Bool(false) @@ -269,7 +270,7 @@ class Datapath(implicit conf: RocketConfiguration) extends Module } when (io.ctrl.mem_ll_waddr === UInt(0)) { io.ctrl.mem_ll_wb := Bool(false) } - io.fpu.dmem_resp_val := io.dmem.resp.valid && dmem_resp_fpu + io.fpu.dmem_resp_val := dmem_resp_valid && dmem_resp_fpu io.fpu.dmem_resp_data := io.dmem.resp.bits.data io.fpu.dmem_resp_type := io.dmem.resp.bits.typ io.fpu.dmem_resp_tag := dmem_resp_waddr @@ -297,7 +298,7 @@ class Datapath(implicit conf: RocketConfiguration) extends Module io.ctrl.wb_waddr := wb_reg_waddr // scoreboard clear (for div/mul and D$ load miss writebacks) - io.ctrl.fp_sboard_clr := io.dmem.resp.bits.replay && dmem_resp_fpu + io.ctrl.fp_sboard_clr := dmem_resp_replay && dmem_resp_fpu io.ctrl.fp_sboard_clra := dmem_resp_waddr // processor control regfile write diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 565059e4..251aa17b 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -698,6 +698,7 @@ class HellaCacheResp(implicit val conf: DCacheConfig) extends DCacheBundle { val nack = Bool() // comes 2 cycles after req.fire val replay = Bool() val typ = Bits(width = 3) + val has_data = Bool() val data = Bits(width = conf.databits) val data_subword = Bits(width = conf.databits) val tag = Bits(width = conf.reqtagbits) @@ -1027,11 +1028,11 @@ class HellaCache(implicit conf: DCacheConfig, tl: TileLinkConfiguration) extends io.cpu.req.ready := Bool(false) } - val s2_do_resp = isRead(s2_req.cmd) || s2_sc - io.cpu.resp.valid := s2_do_resp && (s2_replay || s2_valid_masked && s2_hit) && !s2_data_correctable + io.cpu.resp.valid := (s2_replay || s2_valid_masked && s2_hit) && !s2_data_correctable io.cpu.resp.bits.nack := s2_valid && s2_nack io.cpu.resp.bits := s2_req - io.cpu.resp.bits.replay := s2_replay && s2_do_resp + io.cpu.resp.bits.has_data := isRead(s2_req.cmd) || s2_sc + io.cpu.resp.bits.replay := s2_replay io.cpu.resp.bits.data := loadgen.word io.cpu.resp.bits.data_subword := Mux(s2_sc, s2_sc_fail, loadgen.byte) io.cpu.resp.bits.store_data := s2_req.data @@ -1039,3 +1040,84 @@ class HellaCache(implicit conf: DCacheConfig, tl: TileLinkConfiguration) extends io.mem.grant_ack <> mshrs.io.mem_finish } + +// exposes a sane decoupled request interface +class SimpleHellaCacheIF(implicit conf: DCacheConfig) extends Module +{ + val io = new Bundle { + val requestor = new HellaCacheIO().flip + val cache = new HellaCacheIO + } + + val replaying_cmb = Bool() + val replaying = Reg(next = replaying_cmb, init = Bool(false)) + replaying_cmb := replaying + + val replayq1 = Module(new Queue(new HellaCacheReq, 1, flow = true)) + val replayq2 = Module(new Queue(new HellaCacheReq, 1)) + val req_arb = Module(new Arbiter(new HellaCacheReq, 2)) + + req_arb.io.in(0) <> replayq1.io.deq + req_arb.io.in(1).valid := !replaying_cmb && io.requestor.req.valid + req_arb.io.in(1).bits := io.requestor.req.bits + io.requestor.req.ready := !replaying_cmb && req_arb.io.in(1).ready + + val s2_nack = io.cache.resp.bits.nack + val s3_nack = Reg(next=s2_nack) + + val s0_req_fire = io.cache.req.fire() + val s1_req_fire = Reg(next=s0_req_fire) + val s2_req_fire = Reg(next=s1_req_fire) + + io.cache.req.bits.kill := s2_nack + io.cache.req.bits.phys := Bool(true) + io.cache.req.bits.data := RegEnable(req_arb.io.out.bits.data, s0_req_fire) + io.cache.req <> req_arb.io.out + + // replay queues + // replayq1 holds the older request + // replayq2 holds the newer request (for the first nack) + // we need to split the queues like this for the case where the older request + // goes through but gets nacked, while the newer request stalls + // if this happens, the newer request will go through before the older + // request + // we don't need to check replayq1.io.enq.ready and replayq2.io.enq.ready as + // there will only be two requests going through at most + + // stash d$ request in stage 2 if nacked (older request) + replayq1.io.enq.valid := Bool(false) + replayq1.io.enq.bits.cmd := io.cache.resp.bits.cmd + replayq1.io.enq.bits.typ := io.cache.resp.bits.typ + replayq1.io.enq.bits.addr := io.cache.resp.bits.addr + replayq1.io.enq.bits.data := io.cache.resp.bits.store_data + replayq1.io.enq.bits.tag := io.cache.resp.bits.tag + + // stash d$ request in stage 1 if nacked (newer request) + replayq2.io.enq.valid := s2_req_fire && s3_nack + replayq2.io.enq.bits.data := io.cache.resp.bits.store_data + replayq2.io.enq.bits <> io.cache.resp.bits + replayq2.io.deq.ready := Bool(false) + + when (s2_nack) { + replayq1.io.enq.valid := Bool(true) + replaying_cmb := Bool(true) + } + + // when replaying request got sunk into the d$ + when (s2_req_fire && Reg(next=Reg(next=replaying_cmb)) && !s2_nack) { + // see if there's a stashed request in replayq2 + when (replayq2.io.deq.valid) { + replayq1.io.enq.valid := Bool(true) + replayq1.io.enq.bits.cmd := replayq2.io.deq.bits.cmd + replayq1.io.enq.bits.typ := replayq2.io.deq.bits.typ + replayq1.io.enq.bits.addr := replayq2.io.deq.bits.addr + replayq1.io.enq.bits.data := replayq2.io.deq.bits.data + replayq1.io.enq.bits.tag := replayq2.io.deq.bits.tag + replayq2.io.deq.ready := Bool(true) + } .otherwise { + replaying_cmb := Bool(false) + } + } + + io.requestor.resp := io.cache.resp +} diff --git a/rocket/src/main/scala/rocc.scala b/rocket/src/main/scala/rocc.scala index cbff596d..ac314388 100644 --- a/rocket/src/main/scala/rocc.scala +++ b/rocket/src/main/scala/rocc.scala @@ -2,6 +2,7 @@ package rocket import Chisel._ import Node._ +import uncore._ class RoCCInstruction extends Bundle { @@ -36,35 +37,73 @@ class RoCCInterface(implicit conf: RocketConfiguration) extends Bundle { val cmd = Decoupled(new RoCCCommand).flip val resp = Decoupled(new RoCCResponse) + val mem = new HellaCacheIO()(conf.dcache) val busy = Bool(OUTPUT) val interrupt = Bool(OUTPUT) override def clone = new RoCCInterface().asInstanceOf[this.type] } -abstract class RoCC(implicit conf: RocketConfiguration) extends Module +abstract class RoCC(conf: RocketConfiguration) extends Module { - val io = new RoCCInterface + val io = new RoCCInterface()(conf) } -class AccumulatorExample(implicit conf: RocketConfiguration) extends RoCC +class AccumulatorExample(conf: RocketConfiguration) extends RoCC(conf) { - val regfile = Mem(UInt(width = conf.xprlen), 4) + val n = 4 + val regfile = Mem(UInt(width = conf.xprlen), n) + val busy = Vec.fill(n){Reg(init=Bool(false))} - val funct = io.cmd.bits.inst.funct - val addr = io.cmd.bits.inst.rs2 - val addend = io.cmd.bits.rs1 + val cmd = Queue(io.cmd) + val funct = cmd.bits.inst.funct + val addr = cmd.bits.inst.rs2(log2Up(n)-1,0) + val doWrite = funct === UInt(0) + val doRead = funct === UInt(1) + val doLoad = funct === UInt(2) + val doAccum = funct === UInt(3) + val memRespTag = io.mem.resp.bits.tag(log2Up(n)-1,0) + + // datapath + val addend = cmd.bits.rs1 val accum = regfile(addr) - val wdata = Mux(funct === UInt(0), addend, accum + addend) + val wdata = Mux(doWrite, addend, accum + addend) - when (io.cmd.fire() && (funct === UInt(1) || funct === UInt(3))) { + when (cmd.fire() && (doWrite || doAccum)) { regfile(addr) := wdata } - io.cmd.ready := io.resp.ready - io.resp.valid := io.cmd.valid && io.cmd.bits.inst.xd - io.resp.bits.rd := io.cmd.bits.inst.rd + when (io.mem.resp.valid) { + regfile(memRespTag) := io.mem.resp.bits.data + } + + // control + when (io.mem.req.fire()) { + busy(addr) := Bool(true) + } + + when (io.mem.resp.valid) { + busy(memRespTag) := Bool(false) + } + + val doResp = cmd.bits.inst.xd + val stallReg = busy(addr) + val stallLoad = doLoad && !io.mem.req.ready + val stallResp = doResp && !io.resp.ready + + cmd.ready := !stallReg && !stallLoad && !stallResp + + io.resp.valid := cmd.valid && doResp && !stallReg && !stallLoad + io.resp.bits.rd := cmd.bits.inst.rd io.resp.bits.data := accum + io.busy := Bool(false) io.interrupt := Bool(false) + + io.mem.req.valid := cmd.valid && doLoad && !stallReg && !stallResp + io.mem.req.bits.addr := addend + io.mem.req.bits.cmd := M_XRD // perform a load (M_XWR for stores) + io.mem.req.bits.typ := MT_D // D = 8 bytes, W = 4, H = 2, B = 1 + io.mem.req.bits.data := Bits(0) // we're not performing any stores... + io.mem.req.bits.phys := Bool(true) // don't perform address translation } diff --git a/rocket/src/main/scala/tile.scala b/rocket/src/main/scala/tile.scala index 6be26fc7..022fbf23 100644 --- a/rocket/src/main/scala/tile.scala +++ b/rocket/src/main/scala/tile.scala @@ -6,7 +6,7 @@ import Util._ case class RocketConfiguration(tl: TileLinkConfiguration, icache: ICacheConfig, dcache: DCacheConfig, - fpu: Boolean, rocc: Option[RoCC] = None, + fpu: Boolean, rocc: Option[RocketConfiguration => RoCC] = None, fastLoadWord: Boolean = true, fastLoadByte: Boolean = false, fastMulDiv: Boolean = true) @@ -23,7 +23,6 @@ class Tile(resetSignal: Bool = null)(confIn: RocketConfiguration) extends Module val memPorts = 2 val dcachePortId = 0 val icachePortId = 1 - val vicachePortId = 2 implicit val tlConf = confIn.tl implicit val lnConf = confIn.tl.ln implicit val icConf = confIn.icache @@ -48,6 +47,18 @@ class Tile(resetSignal: Bool = null)(confIn: RocketConfiguration) extends Module ptw.io.requestor(0) <> icache.io.cpu.ptw ptw.io.requestor(1) <> dcache.io.cpu.ptw + if (!conf.rocc.isEmpty) { + val dcIF = Module(new SimpleHellaCacheIF) + val rocc = Module((conf.rocc.get)(conf)) + dcIF.io.requestor <> rocc.io.mem + core.io.rocc <> rocc.io + dcacheArb.io.requestor(2) <> dcIF.io.cache + } + + core.io.host <> io.host + core.io.imem <> icache.io.cpu + core.io.ptw <> ptw.io.dpath + val memArb = Module(new UncachedTileLinkIOArbiterThatAppendsArbiterId(memPorts)) memArb.io.in(dcachePortId) <> dcache.io.mem memArb.io.in(icachePortId) <> icache.io.mem @@ -61,8 +72,4 @@ class Tile(resetSignal: Bool = null)(confIn: RocketConfiguration) extends Module dcache.io.mem.release.meta.ready := io.tilelink.release.meta.ready io.tilelink.release.meta.bits := dcache.io.mem.release.meta.bits io.tilelink.release.meta.bits.payload.client_xact_id := Cat(dcache.io.mem.release.meta.bits.payload.client_xact_id, UInt(dcachePortId, log2Up(memPorts))) // Mimic client id extension done by UncachedTileLinkIOArbiter for Acquires from either client) - - core.io.host <> io.host - core.io.imem <> icache.io.cpu - core.io.ptw <> ptw.io.dpath }