From 058396aefe4103046406be63e1a4c0178c4d13b3 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Fri, 29 Jul 2016 16:36:07 -0700 Subject: [PATCH] [rocket] Implement RVC --- riscv-tools | 2 +- rocket/src/main/scala/btb.scala | 2 +- rocket/src/main/scala/consts.scala | 2 +- rocket/src/main/scala/frontend.scala | 49 ++++---- rocket/src/main/scala/ibuf.scala | 141 +++++++++++++++++++++++ rocket/src/main/scala/idecode.scala | 2 +- rocket/src/main/scala/rocket.scala | 141 +++++++++++++---------- rocket/src/main/scala/rvc.scala | 165 +++++++++++++++++++++++++++ src/main/scala/Configs.scala | 9 +- src/main/scala/Testing.scala | 10 +- 10 files changed, 434 insertions(+), 89 deletions(-) create mode 100644 rocket/src/main/scala/ibuf.scala create mode 100644 rocket/src/main/scala/rvc.scala diff --git a/riscv-tools b/riscv-tools index 7219be43..65da94f8 160000 --- a/riscv-tools +++ b/riscv-tools @@ -1 +1 @@ -Subproject commit 7219be435a89277603e566e806ae8540c7f9a917 +Subproject commit 65da94f84a2ba5a61a8bcf3ebdd8ca57f6d899ca diff --git a/rocket/src/main/scala/btb.scala b/rocket/src/main/scala/btb.scala index 01e5dead..981475e1 100644 --- a/rocket/src/main/scala/btb.scala +++ b/rocket/src/main/scala/btb.scala @@ -233,7 +233,7 @@ class BTB(implicit p: Parameters) extends BtbModule { io.resp.bits.taken := true io.resp.bits.target := Cat(Mux1H(Mux1H(hitsVec, tgtPagesOH), pages), Mux1H(hitsVec, tgts) << log2Up(coreInstBytes)) io.resp.bits.entry := OHToUInt(hits) - io.resp.bits.bridx := Mux1H(hitsVec, brIdx) + io.resp.bits.bridx := (if (fetchWidth > 1) Mux1H(hitsVec, brIdx) else UInt(0)) io.resp.bits.mask := Cat((UInt(1) << ~Mux(io.resp.bits.taken, ~io.resp.bits.bridx, UInt(0)))-1, UInt(1)) if (nBHT > 0) { diff --git a/rocket/src/main/scala/consts.scala b/rocket/src/main/scala/consts.scala index 74386c12..f8536da8 100644 --- a/rocket/src/main/scala/consts.scala +++ b/rocket/src/main/scala/consts.scala @@ -33,7 +33,7 @@ trait ScalarOpConstants { val A2_X = BitPat("b??") val A2_ZERO = UInt(0, 2) - val A2_FOUR = UInt(1, 2) + val A2_SIZE = UInt(1, 2) val A2_RS2 = UInt(2, 2) val A2_IMM = UInt(3, 2) diff --git a/rocket/src/main/scala/frontend.scala b/rocket/src/main/scala/frontend.scala index 77e8f6e6..17080a98 100644 --- a/rocket/src/main/scala/frontend.scala +++ b/rocket/src/main/scala/frontend.scala @@ -11,8 +11,9 @@ class FrontendReq(implicit p: Parameters) extends CoreBundle()(p) { } class FrontendResp(implicit p: Parameters) extends CoreBundle()(p) { + val btb = Valid(new BTBResp) val pc = UInt(width = vaddrBitsExtended) // ID stage PC - val data = Vec(fetchWidth, Bits(width = coreInstBits)) + val data = UInt(width = fetchWidth * coreInstBits) val mask = Bits(width = fetchWidth) val xcpt_if = Bool() val replay = Bool() @@ -21,7 +22,6 @@ class FrontendResp(implicit p: Parameters) extends CoreBundle()(p) { class FrontendIO(implicit p: Parameters) extends CoreBundle()(p) { val req = Valid(new FrontendReq) val resp = Decoupled(new FrontendResp).flip - val btb_resp = Valid(new BTBResp).flip val btb_update = Valid(new BTBUpdate) val bht_update = Valid(new BHTUpdate) val ras_update = Valid(new RASUpdate) @@ -50,28 +50,37 @@ class Frontend(implicit p: Parameters) extends CoreModule()(p) with HasL1CachePa val s2_btb_resp_bits = Reg(new BTBResp) val s2_xcpt_if = Reg(init=Bool(false)) val s2_speculative = Reg(init=Bool(false)) + val s2_cacheable = Reg(init=Bool(false)) val ntpc = ~(~s1_pc | (coreInstBytes*fetchWidth-1)) + UInt(coreInstBytes*fetchWidth) + val ntpc_same_block = (ntpc & rowBytes) === (s1_pc & rowBytes) val predicted_npc = Wire(init = ntpc) + val predicted_taken = Wire(init = Bool(false)) val icmiss = s2_valid && !icache.io.resp.valid val npc = Mux(icmiss, s2_pc, predicted_npc).toUInt - val s0_same_block = Wire(init = !icmiss && !io.cpu.req.valid && ((ntpc & rowBytes) === (s1_pc & rowBytes))) + val s0_same_block = !predicted_taken && !icmiss && !io.cpu.req.valid && ntpc_same_block val stall = io.cpu.resp.valid && !io.cpu.resp.ready when (!stall) { s1_same_block := s0_same_block && !tlb.io.resp.miss - s1_pc_ := npc - s1_speculative := Mux(icmiss, s2_speculative, true) + s1_pc_ := io.cpu.npc + // consider RVC fetches across blocks to be non-speculative if the first + // part was non-speculative + val s0_speculative = + if (usingCompressed) s1_speculative || s2_valid && !s2_speculative || predicted_taken + else Bool(true) + s1_speculative := Mux(icmiss, s2_speculative, s0_speculative) s2_valid := !icmiss when (!icmiss) { s2_pc := s1_pc - s2_speculative := s1_speculative && !tlb.io.resp.cacheable + s2_speculative := s1_speculative + s2_cacheable := tlb.io.resp.cacheable s2_xcpt_if := tlb.io.resp.xcpt_if } } when (io.cpu.req.valid) { s1_same_block := Bool(false) - s1_pc_ := io.cpu.req.bits.pc + s1_pc_ := io.cpu.npc s1_speculative := io.cpu.req.bits.speculative s2_valid := Bool(false) } @@ -79,7 +88,7 @@ class Frontend(implicit p: Parameters) extends CoreModule()(p) with HasL1CachePa if (p(BtbKey).nEntries > 0) { val btb = Module(new BTB) btb.io.req.valid := false - btb.io.req.bits.addr := s1_pc + btb.io.req.bits.addr := s1_pc_ btb.io.btb_update := io.cpu.btb_update btb.io.bht_update := io.cpu.bht_update btb.io.ras_update := io.cpu.ras_update @@ -88,9 +97,9 @@ class Frontend(implicit p: Parameters) extends CoreModule()(p) with HasL1CachePa s2_btb_resp_valid := btb.io.resp.valid s2_btb_resp_bits := btb.io.resp.bits } - when (btb.io.resp.bits.taken) { + when (btb.io.resp.valid && btb.io.resp.bits.taken) { predicted_npc := btb.io.resp.bits.target.sextTo(vaddrBitsExtended) - s0_same_block := Bool(false) + predicted_taken := Bool(true) } } @@ -107,24 +116,18 @@ class Frontend(implicit p: Parameters) extends CoreModule()(p) with HasL1CachePa icache.io.invalidate := io.cpu.flush_icache icache.io.s1_ppn := tlb.io.resp.ppn icache.io.s1_kill := io.cpu.req.valid || tlb.io.resp.miss || tlb.io.resp.xcpt_if || icmiss || io.cpu.flush_tlb - icache.io.s2_kill := s2_speculative + icache.io.s2_kill := s2_speculative && !s2_cacheable icache.io.resp.ready := !stall && !s1_same_block - io.cpu.resp.valid := s2_valid && (icache.io.resp.valid || s2_speculative || s2_xcpt_if) + io.cpu.resp.valid := s2_valid && (icache.io.resp.valid || icache.io.s2_kill || s2_xcpt_if) io.cpu.resp.bits.pc := s2_pc io.cpu.npc := Mux(io.cpu.req.valid, io.cpu.req.bits.pc, npc) - require(fetchWidth * coreInstBytes <= rowBytes) - val fetch_data = icache.io.resp.bits.datablock >> (s2_pc.extract(log2Up(rowBytes)-1,log2Up(fetchWidth*coreInstBytes)) << log2Up(fetchWidth*coreInstBits)) - - for (i <- 0 until fetchWidth) { - io.cpu.resp.bits.data(i) := fetch_data(i*coreInstBits+coreInstBits-1, i*coreInstBits) - } - + require(fetchWidth * coreInstBytes <= rowBytes && isPow2(fetchWidth)) + io.cpu.resp.bits.data := icache.io.resp.bits.datablock >> (s2_pc.extract(log2Up(rowBytes)-1,log2Up(fetchWidth*coreInstBytes)) << log2Up(fetchWidth*coreInstBits)) io.cpu.resp.bits.mask := UInt((1 << fetchWidth)-1) << s2_pc.extract(log2Up(fetchWidth)+log2Up(coreInstBytes)-1, log2Up(coreInstBytes)) io.cpu.resp.bits.xcpt_if := s2_xcpt_if - io.cpu.resp.bits.replay := s2_speculative && !icache.io.resp.valid && !s2_xcpt_if - - io.cpu.btb_resp.valid := s2_btb_resp_valid - io.cpu.btb_resp.bits := s2_btb_resp_bits + io.cpu.resp.bits.replay := icache.io.s2_kill && !icache.io.resp.valid && !s2_xcpt_if + io.cpu.resp.bits.btb.valid := s2_btb_resp_valid + io.cpu.resp.bits.btb.bits := s2_btb_resp_bits } diff --git a/rocket/src/main/scala/ibuf.scala b/rocket/src/main/scala/ibuf.scala new file mode 100644 index 00000000..1b94e80f --- /dev/null +++ b/rocket/src/main/scala/ibuf.scala @@ -0,0 +1,141 @@ +// See LICENSE for license details. + +package rocket + +import Chisel._ +import Util._ +import cde.{Parameters, Field} +import junctions._ + +class Instruction(implicit val p: Parameters) extends ParameterizedBundle with HasCoreParameters { + val pf0 = Bool() // page fault on first half of instruction + val pf1 = Bool() // page fault on second half of instruction + val replay = Bool() + val btb_hit = Bool() + val rvc = Bool() + val inst = new ExpandedInstruction + require(coreInstBits == (if (usingCompressed) 16 else 32)) +} + +class IBuf(implicit p: Parameters) extends CoreModule { + val io = new Bundle { + val imem = Decoupled(new FrontendResp).flip + val kill = Bool(INPUT) + val pc = UInt(width = vaddrBitsExtended) + val btb_resp = new BTBResp().asOutput + val inst = Vec(retireWidth, Decoupled(new Instruction)) + } + + // This module is meant to be more general, but it's not there yet + require(fetchWidth == (if (usingCompressed) 2 else 1)) + + val n = fetchWidth - 1 + val nBufValid = if (n == 0) UInt(0) else Reg(init=UInt(0, log2Ceil(fetchWidth))) + val buf = Reg(io.imem.bits) + val ibufBTBHit = Reg(Bool()) + val ibufBTBResp = Reg(new BTBResp) + val pcWordMask = UInt(coreInstBytes*fetchWidth-1, vaddrBitsExtended) + + val pcWordBits = io.imem.bits.pc.extract(log2Ceil(fetchWidth*coreInstBytes)-1, log2Ceil(coreInstBytes)) + val nReady = Wire(init = UInt(0, log2Ceil(fetchWidth+1))) + val nIC = Mux(io.imem.bits.btb.valid && io.imem.bits.btb.bits.taken, io.imem.bits.btb.bits.bridx +& 1, UInt(fetchWidth)) - pcWordBits + val nICReady = nReady - nBufValid + val nValid = Mux(io.imem.valid, nIC, UInt(0)) + nBufValid + io.imem.ready := nReady >= nBufValid && (nICReady >= nIC || n >= nIC - nICReady) + + if (n > 0) { + nBufValid := Mux(nReady >= nBufValid, UInt(0), nBufValid - nReady) + if (n > 1) when (nReady > 0 && nReady < nBufValid) { + val shiftedBuf = shiftInsnRight(buf.data(n*coreInstBits-1, coreInstBits), (nReady-1)(log2Ceil(n-1)-1,0)) + buf.data := Cat(buf.data(n*coreInstBits-1, (n-1)*coreInstBits), shiftedBuf((n-1)*coreInstBits-1, 0)) + buf.pc := buf.pc & ~pcWordMask | (buf.pc + (nReady << log2Ceil(coreInstBytes))) & pcWordMask + ibufBTBResp.bridx := ibufBTBResp.bridx - nReady + } + when (io.imem.valid && nReady >= nBufValid && nICReady < nIC && n >= nIC - nICReady) { + val shamt = pcWordBits + nICReady + nBufValid := nIC - nICReady + buf := io.imem.bits + buf.data := shiftInsnRight(io.imem.bits.data, shamt)(n*coreInstBits-1,0) + buf.pc := io.imem.bits.pc & ~pcWordMask | (io.imem.bits.pc + (nICReady << log2Ceil(coreInstBytes))) & pcWordMask + ibufBTBHit := io.imem.bits.btb.valid + when (io.imem.bits.btb.valid) { + ibufBTBResp := io.imem.bits.btb.bits + ibufBTBResp.bridx := io.imem.bits.btb.bits.bridx + nICReady + } + } + when (io.kill) { + nBufValid := 0 + } + } + + val icShiftAmt = (fetchWidth + nBufValid - pcWordBits)(log2Ceil(fetchWidth), 0) + val icData = shiftInsnLeft(Cat(io.imem.bits.data, Fill(fetchWidth, io.imem.bits.data(coreInstBits-1, 0))), icShiftAmt) + .extract(3*fetchWidth*coreInstBits-1, 2*fetchWidth*coreInstBits) + val icMask = (~UInt(0, fetchWidth*coreInstBits) << (nBufValid << log2Ceil(coreInstBits)))(fetchWidth*coreInstBits-1,0) + val inst = icData & icMask | buf.data & ~icMask + + val valid = (UIntToOH(nValid) - 1)(fetchWidth-1, 0) + val bufMask = UIntToOH(nBufValid) - 1 + val xcpt_if = valid & (Mux(buf.xcpt_if, bufMask, UInt(0)) | Mux(io.imem.bits.xcpt_if, ~bufMask, UInt(0))) + val ic_replay = valid & (Mux(buf.replay, bufMask, UInt(0)) | Mux(io.imem.bits.replay, ~bufMask, UInt(0))) + val boundaries = findInsnBoundaries(inst) + val ibufBTBHitMask = Mux(ibufBTBHit, UIntToOH(ibufBTBResp.bridx), UInt(0)) + val icBTBHitMask = Mux(io.imem.bits.btb.valid, UIntToOH(io.imem.bits.btb.bits.bridx +& nBufValid - pcWordBits), UInt(0)) + val btbHitMask = ibufBTBHitMask & bufMask | icBTBHitMask & ~bufMask + + io.btb_resp := Mux((ibufBTBHitMask & bufMask).orR, ibufBTBResp, io.imem.bits.btb.bits) + io.pc := Mux(nBufValid > 0, buf.pc, io.imem.bits.pc) + expand(0, 0, inst) + + def expand(i: Int, j: UInt, curInst: UInt): Unit = if (i < retireWidth) { + val exp = Module(new RVCExpander) + exp.io.in := curInst + io.inst(i).bits.inst := exp.io.out + + if (usingCompressed) { + val replay = ic_replay(j) || (!exp.io.rvc && (btbHitMask(j) || ic_replay(j+1))) + io.inst(i).valid := valid(j) && (exp.io.rvc || valid(j+1) || xcpt_if(j+1) || replay) + io.inst(i).bits.pf0 := xcpt_if(j) + io.inst(i).bits.pf1 := !exp.io.rvc && xcpt_if(j+1) + io.inst(i).bits.replay := replay + io.inst(i).bits.btb_hit := btbHitMask(j) || (!exp.io.rvc && btbHitMask(j+1)) + io.inst(i).bits.rvc := exp.io.rvc + + when (io.inst(i).fire()) { nReady := Mux(exp.io.rvc, j+1, j+2) } + + expand(i+1, Mux(exp.io.rvc, j+1, j+2), Mux(exp.io.rvc, curInst >> 16, curInst >> 32)) + } else { + when (io.inst(i).ready) { nReady := i+1 } + io.inst(i).valid := valid(i) + io.inst(i).bits.pf0 := xcpt_if(i) + io.inst(i).bits.pf1 := false + io.inst(i).bits.replay := ic_replay(i) + io.inst(i).bits.rvc := false + io.inst(i).bits.btb_hit := btbHitMask(i) + + expand(i+1, null, curInst >> 32) + } + } + + def shiftInsnLeft(in: UInt, dist: UInt) = { + val r = in.getWidth/coreInstBits + require(in.getWidth % coreInstBits == 0) + val data = Cat(Fill((1 << (log2Ceil(r) + 1)) - r, in >> (r-1)*coreInstBits), in) + data << (dist << log2Ceil(coreInstBits)) + } + + def shiftInsnRight(in: UInt, dist: UInt) = { + val r = in.getWidth/coreInstBits + require(in.getWidth % coreInstBits == 0) + val data = Cat(Fill((1 << (log2Ceil(r) + 1)) - r, in >> (r-1)*coreInstBits), in) + data >> (dist << log2Ceil(coreInstBits)) + } + + def findInsnBoundaries(insns: UInt): Seq[Bool] = { + def isRVC(insn: UInt) = if (usingCompressed) insn(1,0) =/= 3 else Bool(false) + val end = collection.mutable.ArrayBuffer(isRVC(insns)) + for (i <- 1 until insns.getWidth/16) + end += !end.head || isRVC(insns(i*16+1,i*16)) + end + } +} diff --git a/rocket/src/main/scala/idecode.scala b/rocket/src/main/scala/idecode.scala index 2168922c..14ac3553 100644 --- a/rocket/src/main/scala/idecode.scala +++ b/rocket/src/main/scala/idecode.scala @@ -73,7 +73,7 @@ class IDecode(implicit val p: Parameters) extends DecodeConstants BGE-> List(Y,N,N,Y,N,N,Y,Y,A2_RS2, A1_RS1, IMM_SB,DW_X, FN_SGE, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N), BGEU-> List(Y,N,N,Y,N,N,Y,Y,A2_RS2, A1_RS1, IMM_SB,DW_X, FN_SGEU, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N), - JAL-> List(Y,N,N,N,Y,N,N,N,A2_FOUR,A1_PC, IMM_UJ,DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), + JAL-> List(Y,N,N,N,Y,N,N,N,A2_SIZE,A1_PC, IMM_UJ,DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), JALR-> List(Y,N,N,N,N,Y,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), AUIPC-> List(Y,N,N,N,N,N,N,N,A2_IMM, A1_PC, IMM_U, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), diff --git a/rocket/src/main/scala/rocket.scala b/rocket/src/main/scala/rocket.scala index 7756ab18..a1cbc082 100644 --- a/rocket/src/main/scala/rocket.scala +++ b/rocket/src/main/scala/rocket.scala @@ -19,6 +19,7 @@ case object UseVM extends Field[Boolean] case object UseUser extends Field[Boolean] case object UseDebug extends Field[Boolean] case object UseAtomics extends Field[Boolean] +case object UseCompressed extends Field[Boolean] case object UsePerfCounters extends Field[Boolean] case object FastLoadWord extends Field[Boolean] case object FastLoadByte extends Field[Boolean] @@ -42,6 +43,7 @@ trait HasCoreParameters extends HasAddrMapParameters { val usingDebug = p(UseDebug) val usingFPU = p(UseFPU) val usingAtomics = p(UseAtomics) + val usingCompressed = p(UseCompressed) val usingFDivSqrt = p(FDivSqrt) val usingRoCC = !p(BuildRoCC).isEmpty val mulUnroll = p(MulUnroll) @@ -65,6 +67,10 @@ trait HasCoreParameters extends HasAddrMapParameters { val nRoccCsrs = p(RoccNCSRs) val nCores = p(NTiles) + // fetchWidth doubled, but coreInstBytes halved, for RVC + require(fetchWidth == retireWidth * (4 / coreInstBytes)) + require(retireWidth == 1) + // Print out log of committed instructions and their writeback values. // Requires post-processing due to out-of-order writebacks. val enableCommitLog = false @@ -75,7 +81,7 @@ trait HasCoreParameters extends HasAddrMapParameters { case 64 => 50 } - require(paddrBits < maxPAddrBits) + require(paddrBits <= maxPAddrBits) require(!fastLoadByte || fastLoadWord) } @@ -153,8 +159,9 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) { val ex_reg_xcpt_interrupt = Reg(Bool()) val ex_reg_valid = Reg(Bool()) + val ex_reg_rvc = Reg(Bool()) val ex_reg_btb_hit = Reg(Bool()) - val ex_reg_btb_resp = Reg(io.imem.btb_resp.bits) + val ex_reg_btb_resp = Reg(new BTBResp) val ex_reg_xcpt = Reg(Bool()) val ex_reg_flush_pipe = Reg(Bool()) val ex_reg_load_use = Reg(Bool()) @@ -165,8 +172,9 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) { val mem_reg_xcpt_interrupt = Reg(Bool()) val mem_reg_valid = Reg(Bool()) + val mem_reg_rvc = Reg(Bool()) val mem_reg_btb_hit = Reg(Bool()) - val mem_reg_btb_resp = Reg(io.imem.btb_resp.bits) + val mem_reg_btb_resp = Reg(new BTBResp) val mem_reg_xcpt = Reg(Bool()) val mem_reg_replay = Reg(Bool()) val mem_reg_flush_pipe = Reg(Bool()) @@ -182,7 +190,6 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) { val wb_reg_valid = Reg(Bool()) val wb_reg_xcpt = Reg(Bool()) - val wb_reg_mem_xcpt = Reg(Bool()) val wb_reg_replay = Reg(Bool()) val wb_reg_cause = Reg(UInt()) val wb_reg_pc = Reg(UInt()) @@ -195,13 +202,17 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) { val take_pc = take_pc_mem_wb // decode stage - val id_pc = io.imem.resp.bits.pc - val id_inst = io.imem.resp.bits.data(0).toBits; require(fetchWidth == 1) - val id_ctrl = Wire(new IntCtrlSigs()).decode(id_inst, decode_table) - val id_raddr3 = id_inst(31,27) - val id_raddr2 = id_inst(24,20) - val id_raddr1 = id_inst(19,15) - val id_waddr = id_inst(11,7) + val ibuf = Module(new IBuf) + val id_expanded_inst = ibuf.io.inst.map(_.bits.inst) + val id_inst = id_expanded_inst.map(_.bits) + ibuf.io.imem <> io.imem.resp + ibuf.io.kill := take_pc + + val id_ctrl = Wire(new IntCtrlSigs()).decode(id_inst(0), decode_table) + val id_raddr3 = id_expanded_inst(0).rs3 + val id_raddr2 = id_expanded_inst(0).rs2 + val id_raddr1 = id_expanded_inst(0).rs1 + val id_waddr = id_expanded_inst(0).rd val id_load_use = Wire(Bool()) val id_reg_fence = Reg(init=Bool(false)) val id_ren = IndexedSeq(id_ctrl.rxs1, id_ctrl.rxs2) @@ -215,7 +226,7 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) { val id_system_insn = id_ctrl.csr === CSR.I val id_csr_ren = (id_ctrl.csr === CSR.S || id_ctrl.csr === CSR.C) && id_raddr1 === UInt(0) val id_csr = Mux(id_csr_ren, CSR.R, id_ctrl.csr) - val id_csr_addr = id_inst(31,20) + val id_csr_addr = id_inst(0)(31,20) // this is overly conservative val safe_csrs = CSRs.sscratch :: CSRs.sepc :: CSRs.mscratch :: CSRs.mepc :: CSRs.mcause :: CSRs.mbadaddr :: Nil val legal_csrs = collection.mutable.LinkedHashSet(CSRs.all:_*) @@ -225,8 +236,8 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) { id_ctrl.fp && !csr.io.status.fs.orR || id_ctrl.rocc && !csr.io.status.xs.orR // stall decode for fences (now, for AMO.aq; later, for AMO.rl and FENCE) - val id_amo_aq = id_inst(26) - val id_amo_rl = id_inst(25) + val id_amo_aq = id_inst(0)(26) + val id_amo_rl = id_inst(0)(25) val id_fence_next = id_ctrl.fence || id_ctrl.amo && id_amo_rl val id_mem_busy = !io.dmem.ordered || io.dmem.req.valid val id_rocc_busy = Bool(usingRoCC) && @@ -239,14 +250,15 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) { val bpu = Module(new BreakpointUnit) bpu.io.status := csr.io.status bpu.io.bp := csr.io.bp - bpu.io.pc := id_pc + bpu.io.pc := ibuf.io.pc bpu.io.ea := mem_reg_wdata + val id_xcpt_if = ibuf.io.inst(0).bits.pf0 || ibuf.io.inst(0).bits.pf1 val (id_xcpt, id_cause) = checkExceptions(List( - (csr.io.interrupt, csr.io.interrupt_cause), - (bpu.io.xcpt_if, UInt(Causes.breakpoint)), - (io.imem.resp.bits.xcpt_if, UInt(Causes.fault_fetch)), - (id_illegal_insn, UInt(Causes.illegal_instruction)))) + (csr.io.interrupt, csr.io.interrupt_cause), + (bpu.io.xcpt_if, UInt(Causes.breakpoint)), + (id_xcpt_if, UInt(Causes.fault_fetch)), + (id_illegal_insn, UInt(Causes.illegal_instruction)))) val dcache_bypass_data = if (fastLoadByte) io.dmem.resp.bits.data @@ -278,7 +290,7 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) { val ex_op2 = MuxLookup(ex_ctrl.sel_alu2, SInt(0), Seq( A2_RS2 -> ex_rs(1).toSInt, A2_IMM -> ex_imm, - A2_FOUR -> SInt(4))) + A2_SIZE -> Mux(ex_reg_rvc, SInt(2), SInt(4)))) val alu = Module(new ALU) alu.io.dw := ex_ctrl.alu_dw @@ -299,16 +311,26 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) { div.io.req.bits.tag := ex_waddr ex_reg_valid := !ctrl_killd - ex_reg_replay := !take_pc && io.imem.resp.valid && io.imem.resp.bits.replay + ex_reg_replay := !take_pc && ibuf.io.inst(0).valid && ibuf.io.inst(0).bits.replay ex_reg_xcpt := !ctrl_killd && id_xcpt - ex_reg_xcpt_interrupt := !take_pc && io.imem.resp.valid && csr.io.interrupt + ex_reg_xcpt_interrupt := !take_pc && ibuf.io.inst(0).valid && csr.io.interrupt when (id_xcpt) { ex_reg_cause := id_cause } + ex_reg_btb_hit := ibuf.io.inst(0).bits.btb_hit + when (ibuf.io.inst(0).bits.btb_hit) { ex_reg_btb_resp := ibuf.io.btb_resp } when (!ctrl_killd) { ex_ctrl := id_ctrl + ex_reg_rvc := ibuf.io.inst(0).bits.rvc ex_ctrl.csr := id_csr - ex_reg_btb_hit := io.imem.btb_resp.valid - when (io.imem.btb_resp.valid) { ex_reg_btb_resp := io.imem.btb_resp.bits } + when (id_xcpt) { // pass PC down ALU writeback pipeline for badaddr + ex_ctrl.alu_fn := ALU.FN_ADD + ex_ctrl.sel_alu1 := A1_PC + ex_ctrl.sel_alu2 := A2_ZERO + when (!bpu.io.xcpt_if && !ibuf.io.inst(0).bits.pf0 && ibuf.io.inst(0).bits.pf1) { // PC+2 + ex_ctrl.sel_alu2 := A2_SIZE + ex_reg_rvc := true + } + } ex_reg_flush_pipe := id_ctrl.fence_i || id_csr_flush || csr.io.singleStep ex_reg_load_use := id_load_use @@ -328,9 +350,9 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) { } } } - when (!ctrl_killd || csr.io.interrupt || io.imem.resp.bits.replay) { - ex_reg_inst := id_inst - ex_reg_pc := id_pc + when (!ctrl_killd || csr.io.interrupt || ibuf.io.inst(0).bits.replay) { + ex_reg_inst := id_inst(0) + ex_reg_pc := ibuf.io.pc } // replay inst in ex stage? @@ -352,18 +374,18 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) { val mem_br_taken = mem_reg_wdata(0) val mem_br_target = mem_reg_pc.toSInt + Mux(mem_ctrl.branch && mem_br_taken, ImmGen(IMM_SB, mem_reg_inst), - Mux(mem_ctrl.jal, ImmGen(IMM_UJ, mem_reg_inst), SInt(4))) - val mem_int_wdata = Mux(mem_ctrl.jalr, mem_br_target, mem_reg_wdata.toSInt).toUInt + Mux(mem_ctrl.jal, ImmGen(IMM_UJ, mem_reg_inst), + Mux(mem_reg_rvc, SInt(2), SInt(4)))) val mem_npc = (Mux(mem_ctrl.jalr, encodeVirtualAddress(mem_reg_wdata, mem_reg_wdata).toSInt, mem_br_target) & SInt(-2)).toUInt - val mem_wrong_npc = Mux(ex_pc_valid, mem_npc =/= ex_reg_pc, Mux(io.imem.resp.valid, mem_npc =/= id_pc, Bool(true))) - val mem_npc_misaligned = mem_npc(1) + val mem_wrong_npc = Mux(ex_pc_valid, mem_npc =/= ex_reg_pc, Mux(ibuf.io.inst(0).valid, mem_npc =/= ibuf.io.pc, Bool(true))) + val mem_npc_misaligned = if (usingCompressed) Bool(false) else mem_npc(1) + val mem_int_wdata = Mux(!mem_reg_xcpt && (mem_ctrl.jalr ^ mem_npc_misaligned), mem_br_target, mem_reg_wdata.toSInt).toUInt val mem_cfi = mem_ctrl.branch || mem_ctrl.jalr || mem_ctrl.jal val mem_cfi_taken = (mem_ctrl.branch && mem_br_taken) || mem_ctrl.jalr || mem_ctrl.jal val mem_misprediction = if (p(BtbKey).nEntries == 0) mem_cfi_taken else mem_wrong_npc - val want_take_pc_mem = mem_reg_valid && (mem_misprediction || mem_reg_flush_pipe) - take_pc_mem := want_take_pc_mem && !mem_npc_misaligned + take_pc_mem := mem_reg_valid && (mem_misprediction || mem_reg_flush_pipe) mem_reg_valid := !ctrl_killx mem_reg_replay := !take_pc_mem_wb && replay_ex @@ -373,6 +395,7 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) { when (ex_pc_valid) { mem_ctrl := ex_ctrl + mem_reg_rvc := ex_reg_rvc mem_reg_load := ex_ctrl.mem && isRead(ex_ctrl.mem_cmd) mem_reg_store := ex_ctrl.mem && isWrite(ex_ctrl.mem_cmd) mem_reg_btb_hit := ex_reg_btb_hit @@ -389,13 +412,13 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) { } val (mem_new_xcpt, mem_new_cause) = checkExceptions(List( - (mem_reg_load && bpu.io.xcpt_ld, UInt(Causes.breakpoint)), - (mem_reg_store && bpu.io.xcpt_st, UInt(Causes.breakpoint)), - (want_take_pc_mem && mem_npc_misaligned, UInt(Causes.misaligned_fetch)), - (mem_ctrl.mem && io.dmem.xcpt.ma.st, UInt(Causes.misaligned_store)), - (mem_ctrl.mem && io.dmem.xcpt.ma.ld, UInt(Causes.misaligned_load)), - (mem_ctrl.mem && io.dmem.xcpt.pf.st, UInt(Causes.fault_store)), - (mem_ctrl.mem && io.dmem.xcpt.pf.ld, UInt(Causes.fault_load)))) + (mem_reg_load && bpu.io.xcpt_ld, UInt(Causes.breakpoint)), + (mem_reg_store && bpu.io.xcpt_st, UInt(Causes.breakpoint)), + (mem_npc_misaligned, UInt(Causes.misaligned_fetch)), + (mem_ctrl.mem && io.dmem.xcpt.ma.st, UInt(Causes.misaligned_store)), + (mem_ctrl.mem && io.dmem.xcpt.ma.ld, UInt(Causes.misaligned_load)), + (mem_ctrl.mem && io.dmem.xcpt.pf.st, UInt(Causes.fault_store)), + (mem_ctrl.mem && io.dmem.xcpt.pf.ld, UInt(Causes.fault_load)))) val (mem_xcpt, mem_cause) = checkExceptions(List( (mem_reg_xcpt_interrupt || mem_reg_xcpt, mem_reg_cause), @@ -412,11 +435,10 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) { wb_reg_valid := !ctrl_killm wb_reg_replay := replay_mem && !take_pc_wb wb_reg_xcpt := mem_xcpt && !take_pc_wb - wb_reg_mem_xcpt := mem_reg_valid && mem_new_xcpt && !(mem_reg_xcpt_interrupt || mem_reg_xcpt) when (mem_xcpt) { wb_reg_cause := mem_cause } when (mem_reg_valid || mem_reg_replay || mem_reg_xcpt_interrupt) { wb_ctrl := mem_ctrl - wb_reg_wdata := Mux(mem_ctrl.fp && mem_ctrl.wxd, io.fpu.toint_data, mem_int_wdata) + wb_reg_wdata := Mux(!mem_reg_xcpt && mem_ctrl.fp && mem_ctrl.wxd, io.fpu.toint_data, mem_int_wdata) when (mem_ctrl.rocc) { wb_reg_rs2 := mem_reg_rs2 } @@ -434,7 +456,7 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) { // writeback arbitration val dmem_resp_xpu = !io.dmem.resp.bits.tag(0).toBool val dmem_resp_fpu = io.dmem.resp.bits.tag(0).toBool - val dmem_resp_waddr = io.dmem.resp.bits.tag >> 1 + val dmem_resp_waddr = io.dmem.resp.bits.tag(5, 1) val dmem_resp_valid = io.dmem.resp.valid && io.dmem.resp.bits.has_data val dmem_resp_replay = dmem_resp_valid && io.dmem.resp.bits.replay @@ -479,7 +501,7 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) { io.rocc.csr <> csr.io.rocc.csr csr.io.rocc.interrupt <> io.rocc.interrupt csr.io.pc := wb_reg_pc - csr.io.badaddr := Mux(wb_reg_mem_xcpt, encodeVirtualAddress(wb_reg_wdata, wb_reg_wdata), wb_reg_pc) + csr.io.badaddr := encodeVirtualAddress(wb_reg_wdata, wb_reg_wdata) io.ptw.ptbr := csr.io.ptbr io.ptw.invalidate := csr.io.fatc io.ptw.status := csr.io.status @@ -495,7 +517,7 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) { (io.fpu.dec.ren3, id_raddr3), (io.fpu.dec.wen, id_waddr)) - val sboard = new Scoreboard(32) + val sboard = new Scoreboard(32, true) sboard.clear(ll_wen, ll_waddr) val id_sboard_hazard = checkHazards(hazard_targets, sboard.read _) sboard.set(wb_set_sboard && wb_wen, wb_waddr) @@ -542,7 +564,7 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) { id_ctrl.rocc && rocc_blocked || // reduce activity while RoCC is busy id_do_fence || csr.io.csr_stall - ctrl_killd := !io.imem.resp.valid || io.imem.resp.bits.replay || take_pc || ctrl_stalld || csr.io.interrupt + ctrl_killd := !ibuf.io.inst(0).valid || ibuf.io.inst(0).bits.replay || take_pc || ctrl_stalld || csr.io.interrupt io.imem.req.valid := take_pc io.imem.req.bits.speculative := !take_pc_wb @@ -552,33 +574,35 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) { mem_npc)).toUInt // mispredicted branch io.imem.flush_icache := wb_reg_valid && wb_ctrl.fence_i && !io.dmem.s2_nack io.imem.flush_tlb := csr.io.fatc - io.imem.resp.ready := !ctrl_stalld || csr.io.interrupt || take_pc_mem - io.imem.btb_update.valid := mem_reg_valid && !mem_npc_misaligned && mem_wrong_npc && mem_cfi_taken && !take_pc_wb + ibuf.io.inst(0).ready := !ctrl_stalld || csr.io.interrupt + + io.imem.btb_update.valid := (mem_reg_replay && mem_reg_btb_hit) || (mem_reg_valid && !take_pc_wb && mem_wrong_npc) + io.imem.btb_update.bits.isValid := !mem_reg_replay && mem_cfi io.imem.btb_update.bits.isJump := mem_ctrl.jal || mem_ctrl.jalr io.imem.btb_update.bits.isReturn := mem_ctrl.jalr && mem_reg_inst(19,15) === BitPat("b00??1") - io.imem.btb_update.bits.pc := mem_reg_pc io.imem.btb_update.bits.target := io.imem.req.bits.pc - io.imem.btb_update.bits.br_pc := mem_reg_pc + io.imem.btb_update.bits.br_pc := (if (usingCompressed) mem_reg_pc + Mux(mem_reg_rvc, UInt(0), UInt(2)) else mem_reg_pc) + io.imem.btb_update.bits.pc := ~(~io.imem.btb_update.bits.br_pc | (coreInstBytes*fetchWidth-1)) io.imem.btb_update.bits.prediction.valid := mem_reg_btb_hit io.imem.btb_update.bits.prediction.bits := mem_reg_btb_resp - io.imem.bht_update.valid := mem_reg_valid && mem_ctrl.branch && !take_pc_wb - io.imem.bht_update.bits.pc := mem_reg_pc + io.imem.bht_update.valid := mem_reg_valid && !take_pc_wb && mem_ctrl.branch + io.imem.bht_update.bits.pc := io.imem.btb_update.bits.pc io.imem.bht_update.bits.taken := mem_br_taken io.imem.bht_update.bits.mispredict := mem_wrong_npc io.imem.bht_update.bits.prediction := io.imem.btb_update.bits.prediction - io.imem.ras_update.valid := mem_reg_valid && io.imem.btb_update.bits.isJump && !mem_npc_misaligned && !take_pc_wb + io.imem.ras_update.valid := mem_reg_valid && !take_pc_wb io.imem.ras_update.bits.returnAddr := mem_int_wdata - io.imem.ras_update.bits.isCall := mem_ctrl.wxd && mem_waddr(0) + io.imem.ras_update.bits.isCall := io.imem.btb_update.bits.isJump && mem_waddr(0) io.imem.ras_update.bits.isReturn := io.imem.btb_update.bits.isReturn io.imem.ras_update.bits.prediction := io.imem.btb_update.bits.prediction io.fpu.valid := !ctrl_killd && id_ctrl.fp io.fpu.killx := ctrl_killx io.fpu.killm := killm_common - io.fpu.inst := id_inst + io.fpu.inst := id_inst(0) io.fpu.fromint_data := ex_rs(0) io.fpu.dmem_resp_val := dmem_resp_valid && dmem_resp_fpu io.fpu.dmem_resp_data := io.dmem.resp.bits.data_word_bypass @@ -659,21 +683,22 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) { Cat(msb, ea(vaddrBits-1,0)) } - class Scoreboard(n: Int) + class Scoreboard(n: Int, zero: Boolean = false) { def set(en: Bool, addr: UInt): Unit = update(en, _next | mask(en, addr)) def clear(en: Bool, addr: UInt): Unit = update(en, _next & ~mask(en, addr)) def read(addr: UInt): Bool = r(addr) def readBypassed(addr: UInt): Bool = _next(addr) - private val r = Reg(init=Bits(0, n)) + private val _r = Reg(init=Bits(0, n)) + private val r = if (zero) (_r >> 1 << 1) else _r private var _next = r private var ens = Bool(false) private def mask(en: Bool, addr: UInt) = Mux(en, UInt(1) << addr, UInt(0)) private def update(en: Bool, update: UInt) = { _next = update ens = ens || en - when (ens) { r := _next } + when (ens) { _r := _next } } } } diff --git a/rocket/src/main/scala/rvc.scala b/rocket/src/main/scala/rvc.scala new file mode 100644 index 00000000..b4934408 --- /dev/null +++ b/rocket/src/main/scala/rvc.scala @@ -0,0 +1,165 @@ +package rocket + +import Chisel._ +import Chisel.ImplicitConversions._ +import Util._ +import cde.Parameters + +class ExpandedInstruction extends Bundle { + val bits = UInt(width = 32) + val rd = UInt(width = 5) + val rs1 = UInt(width = 5) + val rs2 = UInt(width = 5) + val rs3 = UInt(width = 5) +} + +class RVCDecoder(x: UInt)(implicit p: Parameters) { + def inst(bits: UInt, rd: UInt = x(11,7), rs1: UInt = x(19,15), rs2: UInt = x(24,20), rs3: UInt = x(31,27)) = { + val res = Wire(new ExpandedInstruction) + res.bits := bits + res.rd := rd + res.rs1 := rs1 + res.rs2 := rs2 + res.rs3 := rs3 + res + } + + def rs1p = Cat(UInt(1,2), x(9,7)) + def rs2p = Cat(UInt(1,2), x(4,2)) + def rs2 = x(6,2) + def rd = x(11,7) + def addi4spnImm = Cat(x(10,7), x(12,11), x(5), x(6), UInt(0,2)) + def lwImm = Cat(x(5), x(12,10), x(6), UInt(0,2)) + def ldImm = Cat(x(6,5), x(12,10), UInt(0,3)) + def lwspImm = Cat(x(3,2), x(12), x(6,4), UInt(0,2)) + def ldspImm = Cat(x(4,2), x(12), x(6,5), UInt(0,3)) + def swspImm = Cat(x(8,7), x(12,9), UInt(0,2)) + def sdspImm = Cat(x(9,7), x(12,10), UInt(0,3)) + def luiImm = Cat(Fill(15, x(12)), x(6,2), UInt(0,12)) + def addi16spImm = Cat(Fill(3, x(12)), x(4,3), x(5), x(2), x(6), UInt(0,4)) + def addiImm = Cat(Fill(7, x(12)), x(6,2)) + def jImm = Cat(Fill(10, x(12)), x(8), x(10,9), x(6), x(7), x(2), x(11), x(5,3), UInt(0,1)) + def bImm = Cat(Fill(5, x(12)), x(6,5), x(2), x(11,10), x(4,3), UInt(0,1)) + def shamt = Cat(x(12), x(6,2)) + def x0 = UInt(0,5) + def ra = UInt(1,5) + def sp = UInt(2,5) + + def q0 = { + def addi4spn = { + val opc = Mux(x(12,5).orR, UInt(0x13,7), UInt(0x1F,7)) + inst(Cat(addi4spnImm, sp, UInt(0,3), rs2p, opc), rs2p, sp, rs2p) + } + def ld = inst(Cat(ldImm, rs1p, UInt(3,3), rs2p, UInt(0x03,7)), rs2p, rs1p, rs2p) + def lw = inst(Cat(lwImm, rs1p, UInt(2,3), rs2p, UInt(0x03,7)), rs2p, rs1p, rs2p) + def fld = inst(Cat(ldImm, rs1p, UInt(3,3), rs2p, UInt(0x07,7)), rs2p, rs1p, rs2p) + def flw = { + if (p(XLen) == 32) inst(Cat(lwImm, rs1p, UInt(2,3), rs2p, UInt(0x07,7)), rs2p, rs1p, rs2p) + else ld + } + def unimp = inst(Cat(lwImm >> 5, rs2p, rs1p, UInt(2,3), lwImm(4,0), UInt(0x2F,7)), rs2p, rs1p, rs2p) + def sd = inst(Cat(ldImm >> 5, rs2p, rs1p, UInt(3,3), ldImm(4,0), UInt(0x23,7)), rs2p, rs1p, rs2p) + def sw = inst(Cat(lwImm >> 5, rs2p, rs1p, UInt(2,3), lwImm(4,0), UInt(0x23,7)), rs2p, rs1p, rs2p) + def fsd = inst(Cat(ldImm >> 5, rs2p, rs1p, UInt(3,3), ldImm(4,0), UInt(0x27,7)), rs2p, rs1p, rs2p) + def fsw = { + if (p(XLen) == 32) inst(Cat(lwImm >> 5, rs2p, rs1p, UInt(2,3), lwImm(4,0), UInt(0x27,7)), rs2p, rs1p, rs2p) + else sd + } + Seq(addi4spn, fld, lw, flw, unimp, fsd, sw, fsw) + } + + def q1 = { + def addi = inst(Cat(addiImm, rd, UInt(0,3), rd, UInt(0x13,7)), rd, rd, rs2p) + def addiw = { + val opc = Mux(rd.orR, UInt(0x1B,7), UInt(0x1F,7)) + inst(Cat(addiImm, rd, UInt(0,3), rd, opc), rd, rd, rs2p) + } + def jal = { + if (p(XLen) == 32) inst(Cat(jImm(20), jImm(10,1), jImm(11), jImm(19,12), ra, UInt(0x6F,7)), ra, rd, rs2p) + else addiw + } + def li = inst(Cat(addiImm, x0, UInt(0,3), rd, UInt(0x13,7)), rd, x0, rs2p) + def addi16sp = { + val opc = Mux(addiImm.orR, UInt(0x13,7), UInt(0x1F,7)) + inst(Cat(addi16spImm, rd, UInt(0,3), rd, opc), rd, rd, rs2p) + } + def lui = { + val opc = Mux(addiImm.orR, UInt(0x37,7), UInt(0x3F,7)) + val me = inst(Cat(luiImm(31,12), rd, opc), rd, rd, rs2p) + Mux(rd === x0 || rd === sp, addi16sp, me) + } + def j = inst(Cat(jImm(20), jImm(10,1), jImm(11), jImm(19,12), x0, UInt(0x6F,7)), x0, rs1p, rs2p) + def beqz = inst(Cat(bImm(12), bImm(10,5), x0, rs1p, UInt(0,3), bImm(4,1), bImm(11), UInt(0x63,7)), rs1p, rs1p, x0) + def bnez = inst(Cat(bImm(12), bImm(10,5), x0, rs1p, UInt(1,3), bImm(4,1), bImm(11), UInt(0x63,7)), x0, rs1p, x0) + def arith = { + def srli = Cat(shamt, rs1p, UInt(5,3), rs1p, UInt(0x13,7)) + def srai = srli | UInt(1 << 30) + def andi = Cat(addiImm, rs1p, UInt(7,3), rs1p, UInt(0x13,7)) + def rtype = { + val funct = Seq(0.U, 4.U, 6.U, 7.U, 0.U, 0.U, 2.U, 3.U)(Cat(x(12), x(6,5))) + val sub = Mux(x(6,5) === UInt(0), UInt(1 << 30), UInt(0)) + val opc = Mux(x(12), UInt(0x3B,7), UInt(0x33,7)) + Cat(rs2p, rs1p, funct, rs1p, opc) | sub + } + inst(Seq(srli, srai, andi, rtype)(x(11,10)), rs1p, rs1p, rs2p) + } + Seq(addi, jal, li, lui, arith, j, beqz, bnez) + } + + def q2 = { + def slli = inst(Cat(shamt, rd, UInt(1,3), rd, UInt(0x13,7)), rd, rd, rs2) + def ldsp = inst(Cat(ldspImm, sp, UInt(3,3), rd, UInt(0x03,7)), rd, sp, rs2) + def lwsp = inst(Cat(lwspImm, sp, UInt(2,3), rd, UInt(0x03,7)), rd, sp, rs2) + def fldsp = inst(Cat(ldspImm, sp, UInt(3,3), rd, UInt(0x07,7)), rd, sp, rs2) + def flwsp = { + if (p(XLen) == 32) inst(Cat(lwspImm, sp, UInt(2,3), rd, UInt(0x07,7)), rd, sp, rs2) + else ldsp + } + def sdsp = inst(Cat(sdspImm >> 5, rs2, sp, UInt(3,3), sdspImm(4,0), UInt(0x23,7)), rd, sp, rs2) + def swsp = inst(Cat(swspImm >> 5, rs2, sp, UInt(2,3), swspImm(4,0), UInt(0x23,7)), rd, sp, rs2) + def fsdsp = inst(Cat(sdspImm >> 5, rs2, sp, UInt(3,3), sdspImm(4,0), UInt(0x27,7)), rd, sp, rs2) + def fswsp = { + if (p(XLen) == 32) inst(Cat(swspImm >> 5, rs2, sp, UInt(2,3), swspImm(4,0), UInt(0x27,7)), rd, sp, rs2) + else sdsp + } + def jalr = { + val mv = inst(Cat(rs2, x0, UInt(0,3), rd, UInt(0x33,7)), rd, x0, rs2) + val add = inst(Cat(rs2, rd, UInt(0,3), rd, UInt(0x33,7)), rd, rd, rs2) + val jr = Cat(rs2, rd, UInt(0,3), x0, UInt(0x67,7)) + val reserved = Cat(jr >> 7, UInt(0x1F,7)) + val jr_reserved = inst(Mux(rd.orR, jr, reserved), x0, rd, rs2) + val jr_mv = Mux(rs2.orR, mv, jr_reserved) + val jalr = Cat(rs2, rd, UInt(0,3), ra, UInt(0x67,7)) + val ebreak = Cat(jr >> 7, UInt(0x73,7)) | UInt(1 << 20) + val jalr_ebreak = inst(Mux(rd.orR, jalr, ebreak), ra, rd, rs2) + val jalr_add = Mux(rs2.orR, add, jalr_ebreak) + Mux(x(12), jalr_add, jr_mv) + } + Seq(slli, fldsp, lwsp, flwsp, jalr, fsdsp, swsp, fswsp) + } + + def q3 = Seq.fill(8)(passthrough) + + def passthrough = inst(x) + + def decode = { + val s = q0 ++ q1 ++ q2 ++ q3 + s(Cat(x(1,0), x(15,13))) + } +} + +class RVCExpander(implicit p: Parameters) extends Module { + val io = new Bundle { + val in = UInt(INPUT, 32) + val out = new ExpandedInstruction + val rvc = Bool(OUTPUT) + } + + if (p(UseCompressed)) { + io.rvc := io.in(1,0) =/= UInt(3) + io.out := new RVCDecoder(io.in).decode + } else { + io.rvc := Bool(false) + io.out := new RVCDecoder(io.in).passthrough + } +} diff --git a/src/main/scala/Configs.scala b/src/main/scala/Configs.scala index 6ef82984..197acde9 100644 --- a/src/main/scala/Configs.scala +++ b/src/main/scala/Configs.scala @@ -186,7 +186,7 @@ class BaseConfig extends Config ( case RoccNPTWPorts => site(BuildRoCC).map(_.nPTWPorts).foldLeft(0)(_ + _) case RoccNCSRs => site(BuildRoCC).map(_.csrs.size).foldLeft(0)(_ + _) //Rocket Core Constants - case FetchWidth => 1 + case FetchWidth => if (site(UseCompressed)) 2 else 1 case RetireWidth => 1 case UseVM => true case UseUser => true @@ -216,6 +216,11 @@ class BaseConfig extends Config ( TestGeneration.addSuites(env.map(if (site(XLen) == 64) rv64ua else rv32ua)) true } + case UseCompressed => { + val env = if(site(UseVM)) List("p","v") else List("p") + TestGeneration.addSuites(env.map(if (site(XLen) == 64) rv64uc else rv32uc)) + true + } case NExtInterrupts => 2 case AsyncMMIOChannels => false case ExtMMIOPorts => AddrMap() @@ -234,7 +239,7 @@ class BaseConfig extends Config ( case FDivSqrt => true case SFMALatency => 2 case DFMALatency => 3 - case CoreInstBits => 32 + case CoreInstBits => if (site(UseCompressed)) 16 else 32 case CoreDataBits => site(XLen) case NCustomMRWCSRs => 0 case ResetVector => BigInt(0x1000) diff --git a/src/main/scala/Testing.scala b/src/main/scala/Testing.scala index da01df99..975c415e 100644 --- a/src/main/scala/Testing.scala +++ b/src/main/scala/Testing.scala @@ -99,6 +99,9 @@ object DefaultTestSuites { "slt", "slti", "sra", "srai", "srl", "srli", "sub", "xor", "xori") val rv32ui = new AssemblyTestSuite("rv32ui", rv32uiNames)(_) + val rv32ucNames = LinkedHashSet("rvc") + val rv32uc = new AssemblyTestSuite("rv32uc", rv32ucNames)(_) + val rv32umNames = LinkedHashSet("mul", "mulh", "mulhsu", "mulhu", "div", "divu", "rem", "remu") val rv32um = new AssemblyTestSuite("rv32um", rv32umNames)(_) @@ -124,6 +127,9 @@ object DefaultTestSuites { val rv64uaNames = rv32uaNames.map(_.replaceAll("_w","_d")) val rv64ua = new AssemblyTestSuite("rv64ua", rv32uaNames ++ rv64uaNames)(_) + val rv64ucNames = rv32ucNames + val rv64uc = new AssemblyTestSuite("rv64uc", rv64ucNames)(_) + val rv64ufNames = LinkedHashSet("ldst", "move", "fsgnj", "fcmp", "fcvt", "fcvt_w", "fclass", "fadd", "fdiv", "fmin", "fmadd") val rv64uf = new AssemblyTestSuite("rv64uf", rv64ufNames)(_) val rv64ufNoDiv = new AssemblyTestSuite("rv64uf", rv64ufNames - "fdiv")(_) @@ -148,10 +154,10 @@ object DefaultTestSuites { val rv64i = List(rv64ui, rv64si, rv64mi) val rv64pi = List(rv64ui, rv64mi) - val benchmarks = new BenchmarkTestSuite("basic", "$(RISCV)/riscv64-unknown-elf/share/riscv-tests/benchmarks", LinkedHashSet( + val benchmarks = new BenchmarkTestSuite("rvi", "$(RISCV)/riscv64-unknown-elf/share/riscv-tests/benchmarks", LinkedHashSet( "median", "multiply", "qsort", "towers", "vvadd", "dhrystone", "mt-matmul")) - val rv32udBenchmarks = new BenchmarkTestSuite("basic", "$(RISCV)/riscv64-unknown-elf/share/riscv-tests/benchmarks", LinkedHashSet( + val rv32udBenchmarks = new BenchmarkTestSuite("rvd", "$(RISCV)/riscv64-unknown-elf/share/riscv-tests/benchmarks", LinkedHashSet( "mm", "spmv", "mt-vvadd")) val emptyBmarks = new BenchmarkTestSuite("empty",