From 6d1bf5c014cf736f74ac8f7e261928c1b9628f46 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Tue, 24 Nov 2015 18:13:33 -0800 Subject: [PATCH 1/7] Use generic LoadGen/StoreGen --- rocket/src/main/scala/nbdcache.scala | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 296b7420..ea07eecf 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -162,8 +162,8 @@ class IOMSHR(id: Int)(implicit p: Parameters) extends L1HellaCacheModule()(p) { val req_cmd_sc = req.cmd === M_XSC val grant_word = Reg(UInt(width = wordBits)) - val storegen = new StoreGen64(req.typ, req.addr, req.data) - val loadgen = new LoadGen64(req.typ, req.addr, grant_word, req_cmd_sc) + val storegen = new StoreGen(req.typ, req.addr, req.data, 8) + val loadgen = new LoadGen(req.typ, req.addr, grant_word, req_cmd_sc, 8) val beat_offset = req.addr(beatOffBits - 1, wordOffBits) val beat_mask = (storegen.mask << Cat(beat_offset, UInt(0, wordOffBits))) @@ -200,7 +200,7 @@ class IOMSHR(id: Int)(implicit p: Parameters) extends L1HellaCacheModule()(p) { io.resp.valid := (state === s_resp) io.resp.bits := req io.resp.bits.has_data := isRead(req.cmd) - io.resp.bits.data := loadgen.byte | req_cmd_sc + io.resp.bits.data := loadgen.data | req_cmd_sc io.resp.bits.store_data := req.data io.resp.bits.nack := Bool(false) io.resp.bits.replay := io.resp.valid @@ -815,11 +815,7 @@ class HellaCache(implicit p: Parameters) extends L1HellaCacheModule()(p) { s2_req.cmd := s1_req.cmd } - val misaligned = - (((s1_req.typ === MT_H) || (s1_req.typ === MT_HU)) && (s1_req.addr(0) != Bits(0))) || - (((s1_req.typ === MT_W) || (s1_req.typ === MT_WU)) && (s1_req.addr(1,0) != Bits(0))) || - ((s1_req.typ === MT_D) && (s1_req.addr(2,0) != Bits(0))) - + val misaligned = new StoreGen(s1_req.typ, s1_req.addr, UInt(0), 8).misaligned io.cpu.xcpt.ma.ld := s1_read && misaligned io.cpu.xcpt.ma.st := s1_write && misaligned io.cpu.xcpt.pf.ld := s1_read && dtlb.io.resp.xcpt_ld @@ -1018,7 +1014,7 @@ class HellaCache(implicit p: Parameters) extends L1HellaCacheModule()(p) { // load data subword mux/sign extension val s2_data_word_prebypass = s2_data_uncorrected >> Cat(s2_word_idx, Bits(0,log2Up(coreDataBits))) val s2_data_word = Mux(s2_store_bypass, s2_store_bypass_data, s2_data_word_prebypass) - val loadgen = new LoadGen64(s2_req.typ, s2_req.addr, s2_data_word, s2_sc) + val loadgen = new LoadGen(s2_req.typ, s2_req.addr, s2_data_word, s2_sc, 8) amoalu.io.addr := s2_req.addr amoalu.io.cmd := s2_req.cmd @@ -1052,7 +1048,7 @@ class HellaCache(implicit p: Parameters) extends L1HellaCacheModule()(p) { cache_resp.valid := (s2_replay || s2_valid_masked && s2_hit) && !s2_data_correctable cache_resp.bits := s2_req cache_resp.bits.has_data := isRead(s2_req.cmd) - cache_resp.bits.data := loadgen.byte | s2_sc_fail + cache_resp.bits.data := loadgen.data | s2_sc_fail cache_resp.bits.store_data := s2_req.data cache_resp.bits.nack := s2_valid && s2_nack cache_resp.bits.replay := s2_replay @@ -1065,7 +1061,7 @@ class HellaCache(implicit p: Parameters) extends L1HellaCacheModule()(p) { mshrs.io.resp.ready := !cache_pass io.cpu.resp := Mux(cache_pass, cache_resp, uncache_resp) - io.cpu.resp.bits.data_word_bypass := loadgen.word + io.cpu.resp.bits.data_word_bypass := loadgen.wordData io.cpu.ordered := mshrs.io.fence_rdy && !s1_valid && !s2_valid io.cpu.replay_next.valid := s1_replay && s1_read io.cpu.replay_next.bits := s1_req.tag From 4616db469530b16c21a0724963f8ea7416f5471e Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Tue, 24 Nov 2015 18:27:07 -0800 Subject: [PATCH 2/7] Make RegFile/ImmGen usable by zscale --- rocket/src/main/scala/rocket.scala | 87 +++++++++++++++--------------- 1 file changed, 45 insertions(+), 42 deletions(-) diff --git a/rocket/src/main/scala/rocket.scala b/rocket/src/main/scala/rocket.scala index 6c245cf5..2cabc403 100644 --- a/rocket/src/main/scala/rocket.scala +++ b/rocket/src/main/scala/rocket.scala @@ -60,6 +60,47 @@ abstract class CoreModule(implicit val p: Parameters) extends Module abstract class CoreBundle(implicit val p: Parameters) extends ParameterizedBundle()(p) with HasCoreParameters +class RegFile(n: Int, w: Int, zero: Boolean = false) { + private val rf = Mem(n, UInt(width = w)) + private def access(addr: UInt) = rf(~addr(log2Up(n)-1,0)) + private val reads = collection.mutable.ArrayBuffer[(UInt,UInt)]() + private var canRead = true + def read(addr: UInt) = { + require(canRead) + reads += addr -> Wire(UInt()) + reads.last._2 := Mux(Bool(zero) && addr === UInt(0), UInt(0), access(addr)) + reads.last._2 + } + def write(addr: UInt, data: UInt) = { + canRead = false + when (addr != UInt(0)) { + access(addr) := data + for ((raddr, rdata) <- reads) + when (addr === raddr) { rdata := data } + } + } +} + +object ImmGen { + def apply(sel: UInt, inst: UInt) = { + val sign = Mux(sel === IMM_Z, SInt(0), inst(31).toSInt) + val b30_20 = Mux(sel === IMM_U, inst(30,20).toSInt, sign) + val b19_12 = Mux(sel != IMM_U && sel != IMM_UJ, sign, inst(19,12).toSInt) + val b11 = Mux(sel === IMM_U || sel === IMM_Z, SInt(0), + Mux(sel === IMM_UJ, inst(20).toSInt, + Mux(sel === IMM_SB, inst(7).toSInt, sign))) + val b10_5 = Mux(sel === IMM_U || sel === IMM_Z, Bits(0), inst(30,25)) + val b4_1 = Mux(sel === IMM_U, Bits(0), + Mux(sel === IMM_S || sel === IMM_SB, inst(11,8), + Mux(sel === IMM_Z, inst(19,16), inst(24,21)))) + val b0 = Mux(sel === IMM_S, inst(7), + Mux(sel === IMM_I, inst(20), + Mux(sel === IMM_Z, inst(15), Bits(0)))) + + Cat(sign, b30_20, b19_12, b11, b10_5, b4_1, b0).toSInt + } +} + class Rocket(implicit p: Parameters) extends CoreModule()(p) { val io = new Bundle { val host = new HtifIO @@ -131,7 +172,7 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) { val id_reg_fence = Reg(init=Bool(false)) val id_ren = IndexedSeq(id_ctrl.rxs1, id_ctrl.rxs2) val id_raddr = IndexedSeq(id_raddr1, id_raddr2) - val rf = new RegFile + val rf = new RegFile(31, xLen) val id_rs = id_raddr.map(rf.read _) val ctrl_killd = Wire(Bool()) @@ -189,7 +230,7 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) { val ex_reg_rs_msb = Reg(Vec(UInt(), id_raddr.size)) val ex_rs = for (i <- 0 until id_raddr.size) yield Mux(ex_reg_rs_bypass(i), bypass_mux(ex_reg_rs_lsb(i)), Cat(ex_reg_rs_msb(i), ex_reg_rs_lsb(i))) - val ex_imm = imm(ex_ctrl.sel_imm, ex_reg_inst) + val ex_imm = ImmGen(ex_ctrl.sel_imm, ex_reg_inst) val ex_op1 = MuxLookup(ex_ctrl.sel_alu1, SInt(0), Seq( A1_RS1 -> ex_rs(0).toSInt, A1_PC -> ex_reg_pc.toSInt)) @@ -261,8 +302,8 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) { // memory stage val mem_br_taken = mem_reg_wdata(0) val mem_br_target = mem_reg_pc.toSInt + - Mux(mem_ctrl.branch && mem_br_taken, imm(IMM_SB, mem_reg_inst), - Mux(mem_ctrl.jal, imm(IMM_UJ, mem_reg_inst), SInt(4))) + Mux(mem_ctrl.branch && mem_br_taken, ImmGen(IMM_SB, mem_reg_inst), + Mux(mem_ctrl.jal, ImmGen(IMM_UJ, mem_reg_inst), SInt(4))) val mem_int_wdata = Mux(mem_ctrl.jalr, mem_br_target, mem_reg_wdata.toSInt).toUInt val mem_npc = (Mux(mem_ctrl.jalr, Cat(vaSign(mem_reg_wdata, mem_reg_wdata), mem_reg_wdata(vaddrBits-1,0)).toSInt, mem_br_target) & SInt(-2)).toUInt val mem_wrong_npc = mem_npc != ex_reg_pc || !ex_reg_valid @@ -541,24 +582,6 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) { def checkHazards(targets: Seq[(Bool, UInt)], cond: UInt => Bool) = targets.map(h => h._1 && cond(h._2)).reduce(_||_) - def imm(sel: UInt, inst: UInt) = { - val sign = Mux(sel === IMM_Z, SInt(0), inst(31).toSInt) - val b30_20 = Mux(sel === IMM_U, inst(30,20).toSInt, sign) - val b19_12 = Mux(sel != IMM_U && sel != IMM_UJ, sign, inst(19,12).toSInt) - val b11 = Mux(sel === IMM_U || sel === IMM_Z, SInt(0), - Mux(sel === IMM_UJ, inst(20).toSInt, - Mux(sel === IMM_SB, inst(7).toSInt, sign))) - val b10_5 = Mux(sel === IMM_U || sel === IMM_Z, Bits(0), inst(30,25)) - val b4_1 = Mux(sel === IMM_U, Bits(0), - Mux(sel === IMM_S || sel === IMM_SB, inst(11,8), - Mux(sel === IMM_Z, inst(19,16), inst(24,21)))) - val b0 = Mux(sel === IMM_S, inst(7), - Mux(sel === IMM_I, inst(20), - Mux(sel === IMM_Z, inst(15), Bits(0)))) - - Cat(sign, b30_20, b19_12, b11, b10_5, b4_1, b0).toSInt - } - def vaSign(a0: UInt, ea: UInt) = { // efficient means to compress 64-bit VA into vaddrBits+1 bits // (VA is bad if VA(vaddrBits) != VA(vaddrBits-1)) @@ -569,26 +592,6 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) { e(0))) } - class RegFile { - private val rf = Mem(31, UInt(width = 64)) - private val reads = collection.mutable.ArrayBuffer[(UInt,UInt)]() - private var canRead = true - def read(addr: UInt) = { - require(canRead) - reads += addr -> Wire(UInt()) - reads.last._2 := rf(~addr) - reads.last._2 - } - def write(addr: UInt, data: UInt) = { - canRead = false - when (addr != UInt(0)) { - rf(~addr) := data - for ((raddr, rdata) <- reads) - when (addr === raddr) { rdata := data } - } - } - } - class Scoreboard(n: Int) { def set(en: Bool, addr: UInt): Unit = update(en, _next | mask(en, addr)) From 5294e94794e351e7e58e992676d969c78478169b Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Tue, 24 Nov 2015 18:28:14 -0800 Subject: [PATCH 3/7] Remove CSR back pressure ability We were using it for IPIs, but no longer need it. --- rocket/src/main/scala/csr.scala | 4 +--- rocket/src/main/scala/rocket.scala | 3 +-- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/rocket/src/main/scala/csr.scala b/rocket/src/main/scala/csr.scala index efaf25b8..c9675527 100644 --- a/rocket/src/main/scala/csr.scala +++ b/rocket/src/main/scala/csr.scala @@ -78,7 +78,6 @@ class CSRFileIO(implicit p: Parameters) extends CoreBundle { val wdata = Bits(INPUT, xLen) } - val csr_replay = Bool(OUTPUT) val csr_stall = Bool(OUTPUT) val csr_xcpt = Bool(OUTPUT) val eret = Bool(OUTPUT) @@ -347,14 +346,13 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) reg_sepc := reg_mepc } - assert(PopCount(insn_ret :: insn_redirect_trap :: io.exception :: csr_xcpt :: io.csr_replay :: Nil) <= 1, "these conditions must be mutually exclusive") + assert(PopCount(insn_ret :: insn_redirect_trap :: io.exception :: csr_xcpt :: Nil) <= 1, "these conditions must be mutually exclusive") when (read_time >= reg_mtimecmp) { reg_mip.mtip := true } io.time := reg_cycle - io.csr_replay := false io.csr_stall := reg_wfi when (host_csr_req_fire && !host_csr_bits.rw && decoded_addr(CSRs.mtohost)) { reg_tohost := UInt(0) } diff --git a/rocket/src/main/scala/rocket.scala b/rocket/src/main/scala/rocket.scala index 2cabc403..53824d51 100644 --- a/rocket/src/main/scala/rocket.scala +++ b/rocket/src/main/scala/rocket.scala @@ -364,8 +364,7 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) { } val wb_set_sboard = wb_ctrl.div || wb_dcache_miss || wb_ctrl.rocc - val replay_wb_common = - io.dmem.resp.bits.nack || wb_reg_replay || csr.io.csr_replay + val replay_wb_common = io.dmem.resp.bits.nack || wb_reg_replay val wb_rocc_val = wb_reg_valid && wb_ctrl.rocc && !replay_wb_common val replay_wb = replay_wb_common || wb_reg_valid && wb_ctrl.rocc && !io.rocc.cmd.ready val wb_xcpt = wb_reg_xcpt || csr.io.csr_xcpt From e203b8b3787780471ed68c3315998e973bbe0d83 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Tue, 24 Nov 2015 19:17:07 -0800 Subject: [PATCH 4/7] Make ALU generic for zscale --- rocket/src/main/scala/dpath_alu.scala | 50 ++++++++++++++++----------- rocket/src/main/scala/rocket.scala | 2 +- 2 files changed, 30 insertions(+), 22 deletions(-) diff --git a/rocket/src/main/scala/dpath_alu.scala b/rocket/src/main/scala/dpath_alu.scala index 58942266..8aab0cfd 100644 --- a/rocket/src/main/scala/dpath_alu.scala +++ b/rocket/src/main/scala/dpath_alu.scala @@ -43,17 +43,15 @@ object ALU } import ALU._ -class ALUIO(implicit p: Parameters) extends CoreBundle()(p) { - val dw = Bits(INPUT, SZ_DW) - val fn = Bits(INPUT, SZ_ALU_FN) - val in2 = UInt(INPUT, xLen) - val in1 = UInt(INPUT, xLen) - val out = UInt(OUTPUT, xLen) - val adder_out = UInt(OUTPUT, xLen) -} - -class ALU(implicit p: Parameters) extends Module { - val io = new ALUIO +class ALU(xLen: Int) extends Module { + val io = new Bundle { + val dw = Bits(INPUT, SZ_DW) + val fn = Bits(INPUT, SZ_ALU_FN) + val in2 = UInt(INPUT, xLen) + val in1 = UInt(INPUT, xLen) + val out = UInt(OUTPUT, xLen) + val adder_out = UInt(OUTPUT, xLen) + } // ADD, SUB val sum = io.in1 + Mux(isSub(io.fn), -io.in2, io.in2) @@ -61,19 +59,26 @@ class ALU(implicit p: Parameters) extends Module { // SLT, SLTU val cmp = cmpInverted(io.fn) ^ Mux(cmpEq(io.fn), sum === UInt(0), - Mux(io.in1(63) === io.in2(63), sum(63), - Mux(cmpUnsigned(io.fn), io.in2(63), io.in1(63)))) + Mux(io.in1(xLen-1) === io.in2(xLen-1), sum(xLen-1), + Mux(cmpUnsigned(io.fn), io.in2(xLen-1), io.in1(xLen-1)))) // SLL, SRL, SRA - val shamt = Cat(io.in2(5) & (io.dw === DW_64), io.in2(4,0)).toUInt - val shin_hi_32 = Mux(isSub(io.fn), Fill(32, io.in1(31)), UInt(0,32)) - val shin_hi = Mux(io.dw === DW_64, io.in1(63,32), shin_hi_32) - val shin_r = Cat(shin_hi, io.in1(31,0)) + val full_shamt = io.in2(log2Up(xLen)-1,0) + + val (shamt, shin_r) = + if (xLen == 32) (full_shamt, io.in1) + else { + require(xLen == 64) + val shin_hi_32 = Fill(32, isSub(io.fn) && io.in1(31)) + val shin_hi = Mux(io.dw === DW_64, io.in1(63,32), shin_hi_32) + val shamt = Cat(full_shamt(5) & (io.dw === DW_64), full_shamt(4,0)) + (shamt, Cat(shin_hi, io.in1(31,0))) + } val shin = Mux(io.fn === FN_SR || io.fn === FN_SRA, shin_r, Reverse(shin_r)) - val shout_r = (Cat(isSub(io.fn) & shin(63), shin).toSInt >> shamt)(63,0) + val shout_r = (Cat(isSub(io.fn) & shin(xLen-1), shin).toSInt >> shamt)(xLen-1,0) val shout_l = Reverse(shout_r) - val out64 = + val out = Mux(io.fn === FN_ADD || io.fn === FN_SUB, sum, Mux(io.fn === FN_SR || io.fn === FN_SRA, shout_r, Mux(io.fn === FN_SL, shout_l, @@ -82,7 +87,10 @@ class ALU(implicit p: Parameters) extends Module { Mux(io.fn === FN_XOR, io.in1 ^ io.in2, /* all comparisons */ cmp)))))) - val out_hi = Mux(io.dw === DW_64, out64(63,32), Fill(32, out64(31))) - io.out := Cat(out_hi, out64(31,0)).toUInt io.adder_out := sum + io.out := out + if (xLen > 32) { + require(xLen == 64) + when (io.dw === DW_32) { io.out := Cat(Fill(32, out(31)), out(31,0)) } + } } diff --git a/rocket/src/main/scala/rocket.scala b/rocket/src/main/scala/rocket.scala index 53824d51..570f120b 100644 --- a/rocket/src/main/scala/rocket.scala +++ b/rocket/src/main/scala/rocket.scala @@ -239,7 +239,7 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) { A2_IMM -> ex_imm, A2_FOUR -> SInt(4))) - val alu = Module(new ALU) + val alu = Module(new ALU(xLen)) alu.io.dw := ex_ctrl.alu_dw alu.io.fn := ex_ctrl.alu_fn alu.io.in2 := ex_op2.toUInt From 58b0a868341d67fe5a5f62fa96c57b194d2424e3 Mon Sep 17 00:00:00 2001 From: Howard Mao Date: Wed, 25 Nov 2015 14:04:28 -0800 Subject: [PATCH 5/7] some modifications to AccumulatorExample --- rocket/src/main/scala/rocc.scala | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/rocket/src/main/scala/rocc.scala b/rocket/src/main/scala/rocc.scala index ff059811..fc46f9ac 100644 --- a/rocket/src/main/scala/rocc.scala +++ b/rocket/src/main/scala/rocc.scala @@ -56,12 +56,12 @@ abstract class RoCC(implicit p: Parameters) extends CoreModule()(p) { } class AccumulatorExample(n: Int = 4)(implicit p: Parameters) extends RoCC()(p) { - val regfile = Mem(UInt(width = xLen), n) - val busy = Reg(init=Vec(Bool(false), n)) + val regfile = Mem(n, UInt(width = xLen)) + val busy = Reg(init = Vec.fill(n){Bool(false)}) val cmd = Queue(io.cmd) val funct = cmd.bits.inst.funct - val addr = cmd.bits.inst.rs2(log2Up(n)-1,0) + val addr = cmd.bits.rs2(log2Up(n)-1,0) val doWrite = funct === UInt(0) val doRead = funct === UInt(1) val doLoad = funct === UInt(2) From 9256239206ed24e1841bbfd2bb60456b84492700 Mon Sep 17 00:00:00 2001 From: Howard Mao Date: Wed, 25 Nov 2015 16:02:27 -0800 Subject: [PATCH 6/7] implement support for multiple RoCC accelerators --- rocket/src/main/scala/rocc.scala | 170 +++++++++++++++++++++++++++++++ rocket/src/main/scala/tile.scala | 61 +++++++---- 2 files changed, 213 insertions(+), 18 deletions(-) diff --git a/rocket/src/main/scala/rocc.scala b/rocket/src/main/scala/rocc.scala index fc46f9ac..4952b406 100644 --- a/rocket/src/main/scala/rocc.scala +++ b/rocket/src/main/scala/rocc.scala @@ -128,3 +128,173 @@ class AccumulatorExample(n: Int = 4)(implicit p: Parameters) extends RoCC()(p) { io.dptw.req.valid := false io.pptw.req.valid := false } + +class TranslatorExample(implicit p: Parameters) extends RoCC()(p) { + val req_addr = Reg(UInt(width = coreMaxAddrBits)) + val req_rd = Reg(io.resp.bits.rd) + val req_offset = req_addr(pgIdxBits - 1, 0) + val req_vpn = req_addr(coreMaxAddrBits - 1, pgIdxBits) + val ppn = Reg(UInt(width = ppnBits)) + val error = Reg(Bool()) + + val s_idle :: s_ptw_req :: s_ptw_resp :: s_resp :: Nil = Enum(Bits(), 4) + val state = Reg(init = s_idle) + + io.cmd.ready := (state === s_idle) + + when (io.cmd.fire()) { + req_rd := io.cmd.bits.inst.rd + req_addr := io.cmd.bits.rs1 + state := s_ptw_req + } + + when (io.dptw.req.fire()) { state := s_ptw_resp } + + when (state === s_ptw_resp && io.dptw.resp.valid) { + error := io.dptw.resp.bits.error + ppn := io.dptw.resp.bits.pte.ppn + state := s_resp + } + + when (io.resp.fire()) { state := s_idle } + + io.dptw.req.valid := (state === s_ptw_req) + io.dptw.req.bits.addr := req_vpn + io.dptw.req.bits.store := Bool(false) + io.dptw.req.bits.fetch := Bool(false) + + io.resp.valid := (state === s_resp) + io.resp.bits.rd := req_rd + io.resp.bits.data := Mux(error, SInt(-1).toUInt, Cat(ppn, req_offset)) + + io.busy := (state =/= s_idle) + io.interrupt := Bool(false) + io.mem.req.valid := Bool(false) + io.dmem.head.acquire.valid := Bool(false) + io.dmem.head.grant.ready := Bool(false) + io.imem.acquire.valid := Bool(false) + io.imem.grant.ready := Bool(false) + io.iptw.req.valid := Bool(false) + io.pptw.req.valid := Bool(false) +} + +class CharacterCountExample(implicit p: Parameters) extends RoCC()(p) + with HasTileLinkParameters { + + private val blockOffset = tlBeatAddrBits + tlByteAddrBits + + val needle = Reg(UInt(width = 8)) + val addr = Reg(UInt(width = coreMaxAddrBits)) + val count = Reg(UInt(width = xLen)) + val resp_rd = Reg(io.resp.bits.rd) + + val addr_block = addr(coreMaxAddrBits - 1, blockOffset) + val offset = addr(blockOffset - 1, 0) + val next_addr = (addr_block + UInt(1)) << UInt(blockOffset) + + val s_idle :: s_acq :: s_gnt :: s_check :: s_resp :: Nil = Enum(Bits(), 5) + val state = Reg(init = s_idle) + + val gnt = io.dmem.head.grant.bits + val recv_data = Reg(UInt(width = tlDataBits)) + val recv_beat = Reg(UInt(width = tlBeatAddrBits)) + + val data_bytes = Vec.tabulate(tlDataBytes) { i => recv_data(8 * (i + 1) - 1, 8 * i) } + val zero_match = data_bytes.map(_ === UInt(0)) + val needle_match = data_bytes.map(_ === needle) + val first_zero = PriorityEncoder(zero_match) + + val chars_found = PopCount(needle_match.zipWithIndex.map { + case (matches, i) => + val idx = Cat(recv_beat, UInt(i, tlByteAddrBits)) + matches && idx >= offset && UInt(i) <= first_zero + }) + val zero_found = zero_match.reduce(_ || _) + val finished = Reg(Bool()) + + io.cmd.ready := (state === s_idle) + io.resp.valid := (state === s_resp) + io.resp.bits.rd := resp_rd + io.resp.bits.data := count + io.dmem.head.acquire.valid := (state === s_acq) + io.dmem.head.acquire.bits := GetBlock(addr_block = addr_block) + io.dmem.head.grant.ready := (state === s_gnt) + + when (io.cmd.fire()) { + addr := io.cmd.bits.rs1 + needle := io.cmd.bits.rs2 + resp_rd := io.cmd.bits.inst.rd + count := UInt(0) + finished := Bool(false) + state := s_acq + } + + when (io.dmem.head.acquire.fire()) { state := s_gnt } + + when (io.dmem.head.grant.fire()) { + recv_beat := gnt.addr_beat + recv_data := gnt.data + state := s_check + } + + when (state === s_check) { + when (!finished) { + count := count + chars_found + } + when (zero_found) { finished := Bool(true) } + when (recv_beat === UInt(tlDataBeats - 1)) { + addr := next_addr + state := Mux(zero_found || finished, s_resp, s_acq) + } .otherwise { + state := s_gnt + } + } + + when (io.resp.fire()) { state := s_idle } + + io.busy := (state =/= s_idle) + io.interrupt := Bool(false) + io.mem.req.valid := Bool(false) + io.imem.acquire.valid := Bool(false) + io.imem.grant.ready := Bool(false) + io.dptw.req.valid := Bool(false) + io.iptw.req.valid := Bool(false) + io.pptw.req.valid := Bool(false) +} + +class OpcodeSet(val opcodes: Seq[UInt]) { + def |(set: OpcodeSet) = + new OpcodeSet(this.opcodes ++ set.opcodes) + + def matches(oc: UInt) = opcodes.map(_ === oc).reduce(_ || _) +} + +object OpcodeSet { + val custom0 = new OpcodeSet(Seq(Bits("b0001011"))) + val custom1 = new OpcodeSet(Seq(Bits("b0101011"))) + val custom2 = new OpcodeSet(Seq(Bits("b1011011"))) + val custom3 = new OpcodeSet(Seq(Bits("b1111011"))) + val all = custom0 | custom1 | custom2 | custom3 +} + +class RoccCommandRouter(opcodes: Seq[OpcodeSet])(implicit p: Parameters) + extends CoreModule()(p) { + val io = new Bundle { + val in = Decoupled(new RoCCCommand).flip + val out = Vec(opcodes.size, Decoupled(new RoCCCommand)) + val busy = Bool(OUTPUT) + } + + val cmd = Queue(io.in) + val cmdReadys = io.out.zip(opcodes).map { case (out, opcode) => + val me = opcode.matches(cmd.bits.inst.opcode) + out.valid := cmd.valid && me + out.bits := cmd.bits + out.ready && me + } + cmd.ready := cmdReadys.reduce(_ || _) + io.busy := cmd.valid + + assert(PopCount(cmdReadys) <= UInt(1), + "Custom opcode matched for more than one accelerator") +} diff --git a/rocket/src/main/scala/tile.scala b/rocket/src/main/scala/tile.scala index c5e3e851..7c838475 100644 --- a/rocket/src/main/scala/tile.scala +++ b/rocket/src/main/scala/tile.scala @@ -8,15 +8,21 @@ import Util._ import cde.{Parameters, Field} case object CoreName extends Field[String] -case object BuildRoCC extends Field[Option[Parameters => RoCC]] +case object BuildRoCC extends Field[Seq[Parameters => RoCC]] +case object RoccOpcodes extends Field[Seq[OpcodeSet]] +case object RoccAcceleratorMemChannels extends Field[Seq[Int]] abstract class Tile(resetSignal: Bool = null) (implicit p: Parameters) extends Module(_reset = resetSignal) { - val usingRocc = !p(BuildRoCC).isEmpty - val nDCachePorts = 2 + (if(!usingRocc) 0 else 1) - val nPTWPorts = 2 + (if(!usingRocc) 0 else 3) + val buildRocc = p(BuildRoCC) + val roccOpcodes = p(RoccOpcodes) + val roccMemChannels = p(RoccAcceleratorMemChannels) + val usingRocc = !buildRocc.isEmpty + val nRocc = buildRocc.size + val nDCachePorts = 2 + nRocc + val nPTWPorts = 2 + 3 * nRocc val nCachedTileLinkPorts = 1 - val nUncachedTileLinkPorts = 1 + (if(!usingRocc) 0 else p(RoccNMemChannels)) + val nUncachedTileLinkPorts = 1 + p(RoccNMemChannels) val dcacheParams = p.alterPartial({ case CacheName => "L1D" }) val io = new Bundle { val cached = Vec(nCachedTileLinkPorts, new ClientTileLinkIO) @@ -53,18 +59,37 @@ class RocketTile(resetSignal: Bool = null)(implicit p: Parameters) extends Tile( io.cached.head <> dcache.io.mem // If so specified, build an RoCC module and wire it to core + TileLink ports, // otherwise just hookup the icache - io.uncached <> p(BuildRoCC).map { buildItHere => - val rocc = buildItHere(p) - val iMemArb = Module(new ClientTileLinkIOArbiter(2)) - val dcIF = Module(new SimpleHellaCacheIF()(dcacheParams)) - core.io.rocc <> rocc.io - dcIF.io.requestor <> rocc.io.mem - dcArb.io.requestor(2) <> dcIF.io.cache + io.uncached <> (if (usingRocc) { + val iMemArb = Module(new ClientTileLinkIOArbiter(1 + nRocc)) iMemArb.io.in(0) <> icache.io.mem - iMemArb.io.in(1) <> rocc.io.imem - ptw.io.requestor(2) <> rocc.io.iptw - ptw.io.requestor(3) <> rocc.io.dptw - ptw.io.requestor(4) <> rocc.io.pptw - rocc.io.dmem :+ iMemArb.io.out - }.getOrElse(List(icache.io.mem)) + + val respArb = Module(new RRArbiter(new RoCCResponse, nRocc)) + core.io.rocc.resp <> respArb.io.out + + val cmdRouter = Module(new RoccCommandRouter(roccOpcodes)) + cmdRouter.io.in <> core.io.rocc.cmd + + val roccs = buildRocc.zip(roccMemChannels).zipWithIndex.map { + case ((buildItHere, nchannels), i) => + val accelParams = p.alterPartial({ case RoccNMemChannels => nchannels}) + val rocc = buildItHere(accelParams) + val dcIF = Module(new SimpleHellaCacheIF()(dcacheParams)) + rocc.io.cmd <> cmdRouter.io.out(i) + rocc.io.s := core.io.rocc.s + rocc.io.exception := core.io.rocc.exception + dcIF.io.requestor <> rocc.io.mem + dcArb.io.requestor(2 + i) <> dcIF.io.cache + iMemArb.io.in(1 + i) <> rocc.io.imem + ptw.io.requestor(2 + 3 * i) <> rocc.io.iptw + ptw.io.requestor(3 + 3 * i) <> rocc.io.dptw + ptw.io.requestor(4 + 3 * i) <> rocc.io.pptw + rocc + } + + core.io.rocc.busy := cmdRouter.io.busy || roccs.map(_.io.busy).reduce(_ || _) + core.io.rocc.interrupt := roccs.map(_.io.interrupt).reduce(_ || _) + respArb.io.in <> roccs.map(rocc => Queue(rocc.io.resp)) + + roccs.flatMap(_.io.dmem) :+ iMemArb.io.out + } else { Seq(icache.io.mem) }) } From e80340198a621c40bd5007ad71229b9df8d967c4 Mon Sep 17 00:00:00 2001 From: Howard Mao Date: Mon, 30 Nov 2015 17:35:33 -0800 Subject: [PATCH 7/7] use implicit parameters for ALU --- rocket/src/main/scala/dpath_alu.scala | 2 +- rocket/src/main/scala/rocket.scala | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/rocket/src/main/scala/dpath_alu.scala b/rocket/src/main/scala/dpath_alu.scala index 8aab0cfd..e26bff8a 100644 --- a/rocket/src/main/scala/dpath_alu.scala +++ b/rocket/src/main/scala/dpath_alu.scala @@ -43,7 +43,7 @@ object ALU } import ALU._ -class ALU(xLen: Int) extends Module { +class ALU(implicit p: Parameters) extends CoreModule()(p) { val io = new Bundle { val dw = Bits(INPUT, SZ_DW) val fn = Bits(INPUT, SZ_ALU_FN) diff --git a/rocket/src/main/scala/rocket.scala b/rocket/src/main/scala/rocket.scala index 570f120b..53824d51 100644 --- a/rocket/src/main/scala/rocket.scala +++ b/rocket/src/main/scala/rocket.scala @@ -239,7 +239,7 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) { A2_IMM -> ex_imm, A2_FOUR -> SInt(4))) - val alu = Module(new ALU(xLen)) + val alu = Module(new ALU) alu.io.dw := ex_ctrl.alu_dw alu.io.fn := ex_ctrl.alu_fn alu.io.in2 := ex_op2.toUInt