1
0

[rocket] Implement RVC

This commit is contained in:
Andrew Waterman 2016-07-29 16:36:07 -07:00
parent c465120610
commit 058396aefe
10 changed files with 434 additions and 89 deletions

@ -1 +1 @@
Subproject commit 7219be435a89277603e566e806ae8540c7f9a917 Subproject commit 65da94f84a2ba5a61a8bcf3ebdd8ca57f6d899ca

View File

@ -233,7 +233,7 @@ class BTB(implicit p: Parameters) extends BtbModule {
io.resp.bits.taken := true io.resp.bits.taken := true
io.resp.bits.target := Cat(Mux1H(Mux1H(hitsVec, tgtPagesOH), pages), Mux1H(hitsVec, tgts) << log2Up(coreInstBytes)) io.resp.bits.target := Cat(Mux1H(Mux1H(hitsVec, tgtPagesOH), pages), Mux1H(hitsVec, tgts) << log2Up(coreInstBytes))
io.resp.bits.entry := OHToUInt(hits) io.resp.bits.entry := OHToUInt(hits)
io.resp.bits.bridx := Mux1H(hitsVec, brIdx) io.resp.bits.bridx := (if (fetchWidth > 1) Mux1H(hitsVec, brIdx) else UInt(0))
io.resp.bits.mask := Cat((UInt(1) << ~Mux(io.resp.bits.taken, ~io.resp.bits.bridx, UInt(0)))-1, UInt(1)) io.resp.bits.mask := Cat((UInt(1) << ~Mux(io.resp.bits.taken, ~io.resp.bits.bridx, UInt(0)))-1, UInt(1))
if (nBHT > 0) { if (nBHT > 0) {

View File

@ -33,7 +33,7 @@ trait ScalarOpConstants {
val A2_X = BitPat("b??") val A2_X = BitPat("b??")
val A2_ZERO = UInt(0, 2) val A2_ZERO = UInt(0, 2)
val A2_FOUR = UInt(1, 2) val A2_SIZE = UInt(1, 2)
val A2_RS2 = UInt(2, 2) val A2_RS2 = UInt(2, 2)
val A2_IMM = UInt(3, 2) val A2_IMM = UInt(3, 2)

View File

@ -11,8 +11,9 @@ class FrontendReq(implicit p: Parameters) extends CoreBundle()(p) {
} }
class FrontendResp(implicit p: Parameters) extends CoreBundle()(p) { class FrontendResp(implicit p: Parameters) extends CoreBundle()(p) {
val btb = Valid(new BTBResp)
val pc = UInt(width = vaddrBitsExtended) // ID stage PC val pc = UInt(width = vaddrBitsExtended) // ID stage PC
val data = Vec(fetchWidth, Bits(width = coreInstBits)) val data = UInt(width = fetchWidth * coreInstBits)
val mask = Bits(width = fetchWidth) val mask = Bits(width = fetchWidth)
val xcpt_if = Bool() val xcpt_if = Bool()
val replay = Bool() val replay = Bool()
@ -21,7 +22,6 @@ class FrontendResp(implicit p: Parameters) extends CoreBundle()(p) {
class FrontendIO(implicit p: Parameters) extends CoreBundle()(p) { class FrontendIO(implicit p: Parameters) extends CoreBundle()(p) {
val req = Valid(new FrontendReq) val req = Valid(new FrontendReq)
val resp = Decoupled(new FrontendResp).flip val resp = Decoupled(new FrontendResp).flip
val btb_resp = Valid(new BTBResp).flip
val btb_update = Valid(new BTBUpdate) val btb_update = Valid(new BTBUpdate)
val bht_update = Valid(new BHTUpdate) val bht_update = Valid(new BHTUpdate)
val ras_update = Valid(new RASUpdate) val ras_update = Valid(new RASUpdate)
@ -50,28 +50,37 @@ class Frontend(implicit p: Parameters) extends CoreModule()(p) with HasL1CachePa
val s2_btb_resp_bits = Reg(new BTBResp) val s2_btb_resp_bits = Reg(new BTBResp)
val s2_xcpt_if = Reg(init=Bool(false)) val s2_xcpt_if = Reg(init=Bool(false))
val s2_speculative = Reg(init=Bool(false)) val s2_speculative = Reg(init=Bool(false))
val s2_cacheable = Reg(init=Bool(false))
val ntpc = ~(~s1_pc | (coreInstBytes*fetchWidth-1)) + UInt(coreInstBytes*fetchWidth) val ntpc = ~(~s1_pc | (coreInstBytes*fetchWidth-1)) + UInt(coreInstBytes*fetchWidth)
val ntpc_same_block = (ntpc & rowBytes) === (s1_pc & rowBytes)
val predicted_npc = Wire(init = ntpc) val predicted_npc = Wire(init = ntpc)
val predicted_taken = Wire(init = Bool(false))
val icmiss = s2_valid && !icache.io.resp.valid val icmiss = s2_valid && !icache.io.resp.valid
val npc = Mux(icmiss, s2_pc, predicted_npc).toUInt val npc = Mux(icmiss, s2_pc, predicted_npc).toUInt
val s0_same_block = Wire(init = !icmiss && !io.cpu.req.valid && ((ntpc & rowBytes) === (s1_pc & rowBytes))) val s0_same_block = !predicted_taken && !icmiss && !io.cpu.req.valid && ntpc_same_block
val stall = io.cpu.resp.valid && !io.cpu.resp.ready val stall = io.cpu.resp.valid && !io.cpu.resp.ready
when (!stall) { when (!stall) {
s1_same_block := s0_same_block && !tlb.io.resp.miss s1_same_block := s0_same_block && !tlb.io.resp.miss
s1_pc_ := npc s1_pc_ := io.cpu.npc
s1_speculative := Mux(icmiss, s2_speculative, true) // consider RVC fetches across blocks to be non-speculative if the first
// part was non-speculative
val s0_speculative =
if (usingCompressed) s1_speculative || s2_valid && !s2_speculative || predicted_taken
else Bool(true)
s1_speculative := Mux(icmiss, s2_speculative, s0_speculative)
s2_valid := !icmiss s2_valid := !icmiss
when (!icmiss) { when (!icmiss) {
s2_pc := s1_pc s2_pc := s1_pc
s2_speculative := s1_speculative && !tlb.io.resp.cacheable s2_speculative := s1_speculative
s2_cacheable := tlb.io.resp.cacheable
s2_xcpt_if := tlb.io.resp.xcpt_if s2_xcpt_if := tlb.io.resp.xcpt_if
} }
} }
when (io.cpu.req.valid) { when (io.cpu.req.valid) {
s1_same_block := Bool(false) s1_same_block := Bool(false)
s1_pc_ := io.cpu.req.bits.pc s1_pc_ := io.cpu.npc
s1_speculative := io.cpu.req.bits.speculative s1_speculative := io.cpu.req.bits.speculative
s2_valid := Bool(false) s2_valid := Bool(false)
} }
@ -79,7 +88,7 @@ class Frontend(implicit p: Parameters) extends CoreModule()(p) with HasL1CachePa
if (p(BtbKey).nEntries > 0) { if (p(BtbKey).nEntries > 0) {
val btb = Module(new BTB) val btb = Module(new BTB)
btb.io.req.valid := false btb.io.req.valid := false
btb.io.req.bits.addr := s1_pc btb.io.req.bits.addr := s1_pc_
btb.io.btb_update := io.cpu.btb_update btb.io.btb_update := io.cpu.btb_update
btb.io.bht_update := io.cpu.bht_update btb.io.bht_update := io.cpu.bht_update
btb.io.ras_update := io.cpu.ras_update btb.io.ras_update := io.cpu.ras_update
@ -88,9 +97,9 @@ class Frontend(implicit p: Parameters) extends CoreModule()(p) with HasL1CachePa
s2_btb_resp_valid := btb.io.resp.valid s2_btb_resp_valid := btb.io.resp.valid
s2_btb_resp_bits := btb.io.resp.bits s2_btb_resp_bits := btb.io.resp.bits
} }
when (btb.io.resp.bits.taken) { when (btb.io.resp.valid && btb.io.resp.bits.taken) {
predicted_npc := btb.io.resp.bits.target.sextTo(vaddrBitsExtended) predicted_npc := btb.io.resp.bits.target.sextTo(vaddrBitsExtended)
s0_same_block := Bool(false) predicted_taken := Bool(true)
} }
} }
@ -107,24 +116,18 @@ class Frontend(implicit p: Parameters) extends CoreModule()(p) with HasL1CachePa
icache.io.invalidate := io.cpu.flush_icache icache.io.invalidate := io.cpu.flush_icache
icache.io.s1_ppn := tlb.io.resp.ppn icache.io.s1_ppn := tlb.io.resp.ppn
icache.io.s1_kill := io.cpu.req.valid || tlb.io.resp.miss || tlb.io.resp.xcpt_if || icmiss || io.cpu.flush_tlb icache.io.s1_kill := io.cpu.req.valid || tlb.io.resp.miss || tlb.io.resp.xcpt_if || icmiss || io.cpu.flush_tlb
icache.io.s2_kill := s2_speculative icache.io.s2_kill := s2_speculative && !s2_cacheable
icache.io.resp.ready := !stall && !s1_same_block icache.io.resp.ready := !stall && !s1_same_block
io.cpu.resp.valid := s2_valid && (icache.io.resp.valid || s2_speculative || s2_xcpt_if) io.cpu.resp.valid := s2_valid && (icache.io.resp.valid || icache.io.s2_kill || s2_xcpt_if)
io.cpu.resp.bits.pc := s2_pc io.cpu.resp.bits.pc := s2_pc
io.cpu.npc := Mux(io.cpu.req.valid, io.cpu.req.bits.pc, npc) io.cpu.npc := Mux(io.cpu.req.valid, io.cpu.req.bits.pc, npc)
require(fetchWidth * coreInstBytes <= rowBytes) require(fetchWidth * coreInstBytes <= rowBytes && isPow2(fetchWidth))
val fetch_data = icache.io.resp.bits.datablock >> (s2_pc.extract(log2Up(rowBytes)-1,log2Up(fetchWidth*coreInstBytes)) << log2Up(fetchWidth*coreInstBits)) io.cpu.resp.bits.data := icache.io.resp.bits.datablock >> (s2_pc.extract(log2Up(rowBytes)-1,log2Up(fetchWidth*coreInstBytes)) << log2Up(fetchWidth*coreInstBits))
for (i <- 0 until fetchWidth) {
io.cpu.resp.bits.data(i) := fetch_data(i*coreInstBits+coreInstBits-1, i*coreInstBits)
}
io.cpu.resp.bits.mask := UInt((1 << fetchWidth)-1) << s2_pc.extract(log2Up(fetchWidth)+log2Up(coreInstBytes)-1, log2Up(coreInstBytes)) io.cpu.resp.bits.mask := UInt((1 << fetchWidth)-1) << s2_pc.extract(log2Up(fetchWidth)+log2Up(coreInstBytes)-1, log2Up(coreInstBytes))
io.cpu.resp.bits.xcpt_if := s2_xcpt_if io.cpu.resp.bits.xcpt_if := s2_xcpt_if
io.cpu.resp.bits.replay := s2_speculative && !icache.io.resp.valid && !s2_xcpt_if io.cpu.resp.bits.replay := icache.io.s2_kill && !icache.io.resp.valid && !s2_xcpt_if
io.cpu.resp.bits.btb.valid := s2_btb_resp_valid
io.cpu.btb_resp.valid := s2_btb_resp_valid io.cpu.resp.bits.btb.bits := s2_btb_resp_bits
io.cpu.btb_resp.bits := s2_btb_resp_bits
} }

View File

@ -0,0 +1,141 @@
// See LICENSE for license details.
package rocket
import Chisel._
import Util._
import cde.{Parameters, Field}
import junctions._
class Instruction(implicit val p: Parameters) extends ParameterizedBundle with HasCoreParameters {
val pf0 = Bool() // page fault on first half of instruction
val pf1 = Bool() // page fault on second half of instruction
val replay = Bool()
val btb_hit = Bool()
val rvc = Bool()
val inst = new ExpandedInstruction
require(coreInstBits == (if (usingCompressed) 16 else 32))
}
class IBuf(implicit p: Parameters) extends CoreModule {
val io = new Bundle {
val imem = Decoupled(new FrontendResp).flip
val kill = Bool(INPUT)
val pc = UInt(width = vaddrBitsExtended)
val btb_resp = new BTBResp().asOutput
val inst = Vec(retireWidth, Decoupled(new Instruction))
}
// This module is meant to be more general, but it's not there yet
require(fetchWidth == (if (usingCompressed) 2 else 1))
val n = fetchWidth - 1
val nBufValid = if (n == 0) UInt(0) else Reg(init=UInt(0, log2Ceil(fetchWidth)))
val buf = Reg(io.imem.bits)
val ibufBTBHit = Reg(Bool())
val ibufBTBResp = Reg(new BTBResp)
val pcWordMask = UInt(coreInstBytes*fetchWidth-1, vaddrBitsExtended)
val pcWordBits = io.imem.bits.pc.extract(log2Ceil(fetchWidth*coreInstBytes)-1, log2Ceil(coreInstBytes))
val nReady = Wire(init = UInt(0, log2Ceil(fetchWidth+1)))
val nIC = Mux(io.imem.bits.btb.valid && io.imem.bits.btb.bits.taken, io.imem.bits.btb.bits.bridx +& 1, UInt(fetchWidth)) - pcWordBits
val nICReady = nReady - nBufValid
val nValid = Mux(io.imem.valid, nIC, UInt(0)) + nBufValid
io.imem.ready := nReady >= nBufValid && (nICReady >= nIC || n >= nIC - nICReady)
if (n > 0) {
nBufValid := Mux(nReady >= nBufValid, UInt(0), nBufValid - nReady)
if (n > 1) when (nReady > 0 && nReady < nBufValid) {
val shiftedBuf = shiftInsnRight(buf.data(n*coreInstBits-1, coreInstBits), (nReady-1)(log2Ceil(n-1)-1,0))
buf.data := Cat(buf.data(n*coreInstBits-1, (n-1)*coreInstBits), shiftedBuf((n-1)*coreInstBits-1, 0))
buf.pc := buf.pc & ~pcWordMask | (buf.pc + (nReady << log2Ceil(coreInstBytes))) & pcWordMask
ibufBTBResp.bridx := ibufBTBResp.bridx - nReady
}
when (io.imem.valid && nReady >= nBufValid && nICReady < nIC && n >= nIC - nICReady) {
val shamt = pcWordBits + nICReady
nBufValid := nIC - nICReady
buf := io.imem.bits
buf.data := shiftInsnRight(io.imem.bits.data, shamt)(n*coreInstBits-1,0)
buf.pc := io.imem.bits.pc & ~pcWordMask | (io.imem.bits.pc + (nICReady << log2Ceil(coreInstBytes))) & pcWordMask
ibufBTBHit := io.imem.bits.btb.valid
when (io.imem.bits.btb.valid) {
ibufBTBResp := io.imem.bits.btb.bits
ibufBTBResp.bridx := io.imem.bits.btb.bits.bridx + nICReady
}
}
when (io.kill) {
nBufValid := 0
}
}
val icShiftAmt = (fetchWidth + nBufValid - pcWordBits)(log2Ceil(fetchWidth), 0)
val icData = shiftInsnLeft(Cat(io.imem.bits.data, Fill(fetchWidth, io.imem.bits.data(coreInstBits-1, 0))), icShiftAmt)
.extract(3*fetchWidth*coreInstBits-1, 2*fetchWidth*coreInstBits)
val icMask = (~UInt(0, fetchWidth*coreInstBits) << (nBufValid << log2Ceil(coreInstBits)))(fetchWidth*coreInstBits-1,0)
val inst = icData & icMask | buf.data & ~icMask
val valid = (UIntToOH(nValid) - 1)(fetchWidth-1, 0)
val bufMask = UIntToOH(nBufValid) - 1
val xcpt_if = valid & (Mux(buf.xcpt_if, bufMask, UInt(0)) | Mux(io.imem.bits.xcpt_if, ~bufMask, UInt(0)))
val ic_replay = valid & (Mux(buf.replay, bufMask, UInt(0)) | Mux(io.imem.bits.replay, ~bufMask, UInt(0)))
val boundaries = findInsnBoundaries(inst)
val ibufBTBHitMask = Mux(ibufBTBHit, UIntToOH(ibufBTBResp.bridx), UInt(0))
val icBTBHitMask = Mux(io.imem.bits.btb.valid, UIntToOH(io.imem.bits.btb.bits.bridx +& nBufValid - pcWordBits), UInt(0))
val btbHitMask = ibufBTBHitMask & bufMask | icBTBHitMask & ~bufMask
io.btb_resp := Mux((ibufBTBHitMask & bufMask).orR, ibufBTBResp, io.imem.bits.btb.bits)
io.pc := Mux(nBufValid > 0, buf.pc, io.imem.bits.pc)
expand(0, 0, inst)
def expand(i: Int, j: UInt, curInst: UInt): Unit = if (i < retireWidth) {
val exp = Module(new RVCExpander)
exp.io.in := curInst
io.inst(i).bits.inst := exp.io.out
if (usingCompressed) {
val replay = ic_replay(j) || (!exp.io.rvc && (btbHitMask(j) || ic_replay(j+1)))
io.inst(i).valid := valid(j) && (exp.io.rvc || valid(j+1) || xcpt_if(j+1) || replay)
io.inst(i).bits.pf0 := xcpt_if(j)
io.inst(i).bits.pf1 := !exp.io.rvc && xcpt_if(j+1)
io.inst(i).bits.replay := replay
io.inst(i).bits.btb_hit := btbHitMask(j) || (!exp.io.rvc && btbHitMask(j+1))
io.inst(i).bits.rvc := exp.io.rvc
when (io.inst(i).fire()) { nReady := Mux(exp.io.rvc, j+1, j+2) }
expand(i+1, Mux(exp.io.rvc, j+1, j+2), Mux(exp.io.rvc, curInst >> 16, curInst >> 32))
} else {
when (io.inst(i).ready) { nReady := i+1 }
io.inst(i).valid := valid(i)
io.inst(i).bits.pf0 := xcpt_if(i)
io.inst(i).bits.pf1 := false
io.inst(i).bits.replay := ic_replay(i)
io.inst(i).bits.rvc := false
io.inst(i).bits.btb_hit := btbHitMask(i)
expand(i+1, null, curInst >> 32)
}
}
def shiftInsnLeft(in: UInt, dist: UInt) = {
val r = in.getWidth/coreInstBits
require(in.getWidth % coreInstBits == 0)
val data = Cat(Fill((1 << (log2Ceil(r) + 1)) - r, in >> (r-1)*coreInstBits), in)
data << (dist << log2Ceil(coreInstBits))
}
def shiftInsnRight(in: UInt, dist: UInt) = {
val r = in.getWidth/coreInstBits
require(in.getWidth % coreInstBits == 0)
val data = Cat(Fill((1 << (log2Ceil(r) + 1)) - r, in >> (r-1)*coreInstBits), in)
data >> (dist << log2Ceil(coreInstBits))
}
def findInsnBoundaries(insns: UInt): Seq[Bool] = {
def isRVC(insn: UInt) = if (usingCompressed) insn(1,0) =/= 3 else Bool(false)
val end = collection.mutable.ArrayBuffer(isRVC(insns))
for (i <- 1 until insns.getWidth/16)
end += !end.head || isRVC(insns(i*16+1,i*16))
end
}
}

View File

@ -73,7 +73,7 @@ class IDecode(implicit val p: Parameters) extends DecodeConstants
BGE-> List(Y,N,N,Y,N,N,Y,Y,A2_RS2, A1_RS1, IMM_SB,DW_X, FN_SGE, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N), BGE-> List(Y,N,N,Y,N,N,Y,Y,A2_RS2, A1_RS1, IMM_SB,DW_X, FN_SGE, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N),
BGEU-> List(Y,N,N,Y,N,N,Y,Y,A2_RS2, A1_RS1, IMM_SB,DW_X, FN_SGEU, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N), BGEU-> List(Y,N,N,Y,N,N,Y,Y,A2_RS2, A1_RS1, IMM_SB,DW_X, FN_SGEU, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N),
JAL-> List(Y,N,N,N,Y,N,N,N,A2_FOUR,A1_PC, IMM_UJ,DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), JAL-> List(Y,N,N,N,Y,N,N,N,A2_SIZE,A1_PC, IMM_UJ,DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
JALR-> List(Y,N,N,N,N,Y,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), JALR-> List(Y,N,N,N,N,Y,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),
AUIPC-> List(Y,N,N,N,N,N,N,N,A2_IMM, A1_PC, IMM_U, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), AUIPC-> List(Y,N,N,N,N,N,N,N,A2_IMM, A1_PC, IMM_U, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N),

View File

@ -19,6 +19,7 @@ case object UseVM extends Field[Boolean]
case object UseUser extends Field[Boolean] case object UseUser extends Field[Boolean]
case object UseDebug extends Field[Boolean] case object UseDebug extends Field[Boolean]
case object UseAtomics extends Field[Boolean] case object UseAtomics extends Field[Boolean]
case object UseCompressed extends Field[Boolean]
case object UsePerfCounters extends Field[Boolean] case object UsePerfCounters extends Field[Boolean]
case object FastLoadWord extends Field[Boolean] case object FastLoadWord extends Field[Boolean]
case object FastLoadByte extends Field[Boolean] case object FastLoadByte extends Field[Boolean]
@ -42,6 +43,7 @@ trait HasCoreParameters extends HasAddrMapParameters {
val usingDebug = p(UseDebug) val usingDebug = p(UseDebug)
val usingFPU = p(UseFPU) val usingFPU = p(UseFPU)
val usingAtomics = p(UseAtomics) val usingAtomics = p(UseAtomics)
val usingCompressed = p(UseCompressed)
val usingFDivSqrt = p(FDivSqrt) val usingFDivSqrt = p(FDivSqrt)
val usingRoCC = !p(BuildRoCC).isEmpty val usingRoCC = !p(BuildRoCC).isEmpty
val mulUnroll = p(MulUnroll) val mulUnroll = p(MulUnroll)
@ -65,6 +67,10 @@ trait HasCoreParameters extends HasAddrMapParameters {
val nRoccCsrs = p(RoccNCSRs) val nRoccCsrs = p(RoccNCSRs)
val nCores = p(NTiles) val nCores = p(NTiles)
// fetchWidth doubled, but coreInstBytes halved, for RVC
require(fetchWidth == retireWidth * (4 / coreInstBytes))
require(retireWidth == 1)
// Print out log of committed instructions and their writeback values. // Print out log of committed instructions and their writeback values.
// Requires post-processing due to out-of-order writebacks. // Requires post-processing due to out-of-order writebacks.
val enableCommitLog = false val enableCommitLog = false
@ -75,7 +81,7 @@ trait HasCoreParameters extends HasAddrMapParameters {
case 64 => 50 case 64 => 50
} }
require(paddrBits < maxPAddrBits) require(paddrBits <= maxPAddrBits)
require(!fastLoadByte || fastLoadWord) require(!fastLoadByte || fastLoadWord)
} }
@ -153,8 +159,9 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) {
val ex_reg_xcpt_interrupt = Reg(Bool()) val ex_reg_xcpt_interrupt = Reg(Bool())
val ex_reg_valid = Reg(Bool()) val ex_reg_valid = Reg(Bool())
val ex_reg_rvc = Reg(Bool())
val ex_reg_btb_hit = Reg(Bool()) val ex_reg_btb_hit = Reg(Bool())
val ex_reg_btb_resp = Reg(io.imem.btb_resp.bits) val ex_reg_btb_resp = Reg(new BTBResp)
val ex_reg_xcpt = Reg(Bool()) val ex_reg_xcpt = Reg(Bool())
val ex_reg_flush_pipe = Reg(Bool()) val ex_reg_flush_pipe = Reg(Bool())
val ex_reg_load_use = Reg(Bool()) val ex_reg_load_use = Reg(Bool())
@ -165,8 +172,9 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) {
val mem_reg_xcpt_interrupt = Reg(Bool()) val mem_reg_xcpt_interrupt = Reg(Bool())
val mem_reg_valid = Reg(Bool()) val mem_reg_valid = Reg(Bool())
val mem_reg_rvc = Reg(Bool())
val mem_reg_btb_hit = Reg(Bool()) val mem_reg_btb_hit = Reg(Bool())
val mem_reg_btb_resp = Reg(io.imem.btb_resp.bits) val mem_reg_btb_resp = Reg(new BTBResp)
val mem_reg_xcpt = Reg(Bool()) val mem_reg_xcpt = Reg(Bool())
val mem_reg_replay = Reg(Bool()) val mem_reg_replay = Reg(Bool())
val mem_reg_flush_pipe = Reg(Bool()) val mem_reg_flush_pipe = Reg(Bool())
@ -182,7 +190,6 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) {
val wb_reg_valid = Reg(Bool()) val wb_reg_valid = Reg(Bool())
val wb_reg_xcpt = Reg(Bool()) val wb_reg_xcpt = Reg(Bool())
val wb_reg_mem_xcpt = Reg(Bool())
val wb_reg_replay = Reg(Bool()) val wb_reg_replay = Reg(Bool())
val wb_reg_cause = Reg(UInt()) val wb_reg_cause = Reg(UInt())
val wb_reg_pc = Reg(UInt()) val wb_reg_pc = Reg(UInt())
@ -195,13 +202,17 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) {
val take_pc = take_pc_mem_wb val take_pc = take_pc_mem_wb
// decode stage // decode stage
val id_pc = io.imem.resp.bits.pc val ibuf = Module(new IBuf)
val id_inst = io.imem.resp.bits.data(0).toBits; require(fetchWidth == 1) val id_expanded_inst = ibuf.io.inst.map(_.bits.inst)
val id_ctrl = Wire(new IntCtrlSigs()).decode(id_inst, decode_table) val id_inst = id_expanded_inst.map(_.bits)
val id_raddr3 = id_inst(31,27) ibuf.io.imem <> io.imem.resp
val id_raddr2 = id_inst(24,20) ibuf.io.kill := take_pc
val id_raddr1 = id_inst(19,15)
val id_waddr = id_inst(11,7) val id_ctrl = Wire(new IntCtrlSigs()).decode(id_inst(0), decode_table)
val id_raddr3 = id_expanded_inst(0).rs3
val id_raddr2 = id_expanded_inst(0).rs2
val id_raddr1 = id_expanded_inst(0).rs1
val id_waddr = id_expanded_inst(0).rd
val id_load_use = Wire(Bool()) val id_load_use = Wire(Bool())
val id_reg_fence = Reg(init=Bool(false)) val id_reg_fence = Reg(init=Bool(false))
val id_ren = IndexedSeq(id_ctrl.rxs1, id_ctrl.rxs2) val id_ren = IndexedSeq(id_ctrl.rxs1, id_ctrl.rxs2)
@ -215,7 +226,7 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) {
val id_system_insn = id_ctrl.csr === CSR.I val id_system_insn = id_ctrl.csr === CSR.I
val id_csr_ren = (id_ctrl.csr === CSR.S || id_ctrl.csr === CSR.C) && id_raddr1 === UInt(0) val id_csr_ren = (id_ctrl.csr === CSR.S || id_ctrl.csr === CSR.C) && id_raddr1 === UInt(0)
val id_csr = Mux(id_csr_ren, CSR.R, id_ctrl.csr) val id_csr = Mux(id_csr_ren, CSR.R, id_ctrl.csr)
val id_csr_addr = id_inst(31,20) val id_csr_addr = id_inst(0)(31,20)
// this is overly conservative // this is overly conservative
val safe_csrs = CSRs.sscratch :: CSRs.sepc :: CSRs.mscratch :: CSRs.mepc :: CSRs.mcause :: CSRs.mbadaddr :: Nil val safe_csrs = CSRs.sscratch :: CSRs.sepc :: CSRs.mscratch :: CSRs.mepc :: CSRs.mcause :: CSRs.mbadaddr :: Nil
val legal_csrs = collection.mutable.LinkedHashSet(CSRs.all:_*) val legal_csrs = collection.mutable.LinkedHashSet(CSRs.all:_*)
@ -225,8 +236,8 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) {
id_ctrl.fp && !csr.io.status.fs.orR || id_ctrl.fp && !csr.io.status.fs.orR ||
id_ctrl.rocc && !csr.io.status.xs.orR id_ctrl.rocc && !csr.io.status.xs.orR
// stall decode for fences (now, for AMO.aq; later, for AMO.rl and FENCE) // stall decode for fences (now, for AMO.aq; later, for AMO.rl and FENCE)
val id_amo_aq = id_inst(26) val id_amo_aq = id_inst(0)(26)
val id_amo_rl = id_inst(25) val id_amo_rl = id_inst(0)(25)
val id_fence_next = id_ctrl.fence || id_ctrl.amo && id_amo_rl val id_fence_next = id_ctrl.fence || id_ctrl.amo && id_amo_rl
val id_mem_busy = !io.dmem.ordered || io.dmem.req.valid val id_mem_busy = !io.dmem.ordered || io.dmem.req.valid
val id_rocc_busy = Bool(usingRoCC) && val id_rocc_busy = Bool(usingRoCC) &&
@ -239,13 +250,14 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) {
val bpu = Module(new BreakpointUnit) val bpu = Module(new BreakpointUnit)
bpu.io.status := csr.io.status bpu.io.status := csr.io.status
bpu.io.bp := csr.io.bp bpu.io.bp := csr.io.bp
bpu.io.pc := id_pc bpu.io.pc := ibuf.io.pc
bpu.io.ea := mem_reg_wdata bpu.io.ea := mem_reg_wdata
val id_xcpt_if = ibuf.io.inst(0).bits.pf0 || ibuf.io.inst(0).bits.pf1
val (id_xcpt, id_cause) = checkExceptions(List( val (id_xcpt, id_cause) = checkExceptions(List(
(csr.io.interrupt, csr.io.interrupt_cause), (csr.io.interrupt, csr.io.interrupt_cause),
(bpu.io.xcpt_if, UInt(Causes.breakpoint)), (bpu.io.xcpt_if, UInt(Causes.breakpoint)),
(io.imem.resp.bits.xcpt_if, UInt(Causes.fault_fetch)), (id_xcpt_if, UInt(Causes.fault_fetch)),
(id_illegal_insn, UInt(Causes.illegal_instruction)))) (id_illegal_insn, UInt(Causes.illegal_instruction))))
val dcache_bypass_data = val dcache_bypass_data =
@ -278,7 +290,7 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) {
val ex_op2 = MuxLookup(ex_ctrl.sel_alu2, SInt(0), Seq( val ex_op2 = MuxLookup(ex_ctrl.sel_alu2, SInt(0), Seq(
A2_RS2 -> ex_rs(1).toSInt, A2_RS2 -> ex_rs(1).toSInt,
A2_IMM -> ex_imm, A2_IMM -> ex_imm,
A2_FOUR -> SInt(4))) A2_SIZE -> Mux(ex_reg_rvc, SInt(2), SInt(4))))
val alu = Module(new ALU) val alu = Module(new ALU)
alu.io.dw := ex_ctrl.alu_dw alu.io.dw := ex_ctrl.alu_dw
@ -299,16 +311,26 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) {
div.io.req.bits.tag := ex_waddr div.io.req.bits.tag := ex_waddr
ex_reg_valid := !ctrl_killd ex_reg_valid := !ctrl_killd
ex_reg_replay := !take_pc && io.imem.resp.valid && io.imem.resp.bits.replay ex_reg_replay := !take_pc && ibuf.io.inst(0).valid && ibuf.io.inst(0).bits.replay
ex_reg_xcpt := !ctrl_killd && id_xcpt ex_reg_xcpt := !ctrl_killd && id_xcpt
ex_reg_xcpt_interrupt := !take_pc && io.imem.resp.valid && csr.io.interrupt ex_reg_xcpt_interrupt := !take_pc && ibuf.io.inst(0).valid && csr.io.interrupt
when (id_xcpt) { ex_reg_cause := id_cause } when (id_xcpt) { ex_reg_cause := id_cause }
ex_reg_btb_hit := ibuf.io.inst(0).bits.btb_hit
when (ibuf.io.inst(0).bits.btb_hit) { ex_reg_btb_resp := ibuf.io.btb_resp }
when (!ctrl_killd) { when (!ctrl_killd) {
ex_ctrl := id_ctrl ex_ctrl := id_ctrl
ex_reg_rvc := ibuf.io.inst(0).bits.rvc
ex_ctrl.csr := id_csr ex_ctrl.csr := id_csr
ex_reg_btb_hit := io.imem.btb_resp.valid when (id_xcpt) { // pass PC down ALU writeback pipeline for badaddr
when (io.imem.btb_resp.valid) { ex_reg_btb_resp := io.imem.btb_resp.bits } ex_ctrl.alu_fn := ALU.FN_ADD
ex_ctrl.sel_alu1 := A1_PC
ex_ctrl.sel_alu2 := A2_ZERO
when (!bpu.io.xcpt_if && !ibuf.io.inst(0).bits.pf0 && ibuf.io.inst(0).bits.pf1) { // PC+2
ex_ctrl.sel_alu2 := A2_SIZE
ex_reg_rvc := true
}
}
ex_reg_flush_pipe := id_ctrl.fence_i || id_csr_flush || csr.io.singleStep ex_reg_flush_pipe := id_ctrl.fence_i || id_csr_flush || csr.io.singleStep
ex_reg_load_use := id_load_use ex_reg_load_use := id_load_use
@ -328,9 +350,9 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) {
} }
} }
} }
when (!ctrl_killd || csr.io.interrupt || io.imem.resp.bits.replay) { when (!ctrl_killd || csr.io.interrupt || ibuf.io.inst(0).bits.replay) {
ex_reg_inst := id_inst ex_reg_inst := id_inst(0)
ex_reg_pc := id_pc ex_reg_pc := ibuf.io.pc
} }
// replay inst in ex stage? // replay inst in ex stage?
@ -352,18 +374,18 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) {
val mem_br_taken = mem_reg_wdata(0) val mem_br_taken = mem_reg_wdata(0)
val mem_br_target = mem_reg_pc.toSInt + val mem_br_target = mem_reg_pc.toSInt +
Mux(mem_ctrl.branch && mem_br_taken, ImmGen(IMM_SB, mem_reg_inst), Mux(mem_ctrl.branch && mem_br_taken, ImmGen(IMM_SB, mem_reg_inst),
Mux(mem_ctrl.jal, ImmGen(IMM_UJ, mem_reg_inst), SInt(4))) Mux(mem_ctrl.jal, ImmGen(IMM_UJ, mem_reg_inst),
val mem_int_wdata = Mux(mem_ctrl.jalr, mem_br_target, mem_reg_wdata.toSInt).toUInt Mux(mem_reg_rvc, SInt(2), SInt(4))))
val mem_npc = (Mux(mem_ctrl.jalr, encodeVirtualAddress(mem_reg_wdata, mem_reg_wdata).toSInt, mem_br_target) & SInt(-2)).toUInt val mem_npc = (Mux(mem_ctrl.jalr, encodeVirtualAddress(mem_reg_wdata, mem_reg_wdata).toSInt, mem_br_target) & SInt(-2)).toUInt
val mem_wrong_npc = Mux(ex_pc_valid, mem_npc =/= ex_reg_pc, Mux(io.imem.resp.valid, mem_npc =/= id_pc, Bool(true))) val mem_wrong_npc = Mux(ex_pc_valid, mem_npc =/= ex_reg_pc, Mux(ibuf.io.inst(0).valid, mem_npc =/= ibuf.io.pc, Bool(true)))
val mem_npc_misaligned = mem_npc(1) val mem_npc_misaligned = if (usingCompressed) Bool(false) else mem_npc(1)
val mem_int_wdata = Mux(!mem_reg_xcpt && (mem_ctrl.jalr ^ mem_npc_misaligned), mem_br_target, mem_reg_wdata.toSInt).toUInt
val mem_cfi = mem_ctrl.branch || mem_ctrl.jalr || mem_ctrl.jal val mem_cfi = mem_ctrl.branch || mem_ctrl.jalr || mem_ctrl.jal
val mem_cfi_taken = (mem_ctrl.branch && mem_br_taken) || mem_ctrl.jalr || mem_ctrl.jal val mem_cfi_taken = (mem_ctrl.branch && mem_br_taken) || mem_ctrl.jalr || mem_ctrl.jal
val mem_misprediction = val mem_misprediction =
if (p(BtbKey).nEntries == 0) mem_cfi_taken if (p(BtbKey).nEntries == 0) mem_cfi_taken
else mem_wrong_npc else mem_wrong_npc
val want_take_pc_mem = mem_reg_valid && (mem_misprediction || mem_reg_flush_pipe) take_pc_mem := mem_reg_valid && (mem_misprediction || mem_reg_flush_pipe)
take_pc_mem := want_take_pc_mem && !mem_npc_misaligned
mem_reg_valid := !ctrl_killx mem_reg_valid := !ctrl_killx
mem_reg_replay := !take_pc_mem_wb && replay_ex mem_reg_replay := !take_pc_mem_wb && replay_ex
@ -373,6 +395,7 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) {
when (ex_pc_valid) { when (ex_pc_valid) {
mem_ctrl := ex_ctrl mem_ctrl := ex_ctrl
mem_reg_rvc := ex_reg_rvc
mem_reg_load := ex_ctrl.mem && isRead(ex_ctrl.mem_cmd) mem_reg_load := ex_ctrl.mem && isRead(ex_ctrl.mem_cmd)
mem_reg_store := ex_ctrl.mem && isWrite(ex_ctrl.mem_cmd) mem_reg_store := ex_ctrl.mem && isWrite(ex_ctrl.mem_cmd)
mem_reg_btb_hit := ex_reg_btb_hit mem_reg_btb_hit := ex_reg_btb_hit
@ -391,7 +414,7 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) {
val (mem_new_xcpt, mem_new_cause) = checkExceptions(List( val (mem_new_xcpt, mem_new_cause) = checkExceptions(List(
(mem_reg_load && bpu.io.xcpt_ld, UInt(Causes.breakpoint)), (mem_reg_load && bpu.io.xcpt_ld, UInt(Causes.breakpoint)),
(mem_reg_store && bpu.io.xcpt_st, UInt(Causes.breakpoint)), (mem_reg_store && bpu.io.xcpt_st, UInt(Causes.breakpoint)),
(want_take_pc_mem && mem_npc_misaligned, UInt(Causes.misaligned_fetch)), (mem_npc_misaligned, UInt(Causes.misaligned_fetch)),
(mem_ctrl.mem && io.dmem.xcpt.ma.st, UInt(Causes.misaligned_store)), (mem_ctrl.mem && io.dmem.xcpt.ma.st, UInt(Causes.misaligned_store)),
(mem_ctrl.mem && io.dmem.xcpt.ma.ld, UInt(Causes.misaligned_load)), (mem_ctrl.mem && io.dmem.xcpt.ma.ld, UInt(Causes.misaligned_load)),
(mem_ctrl.mem && io.dmem.xcpt.pf.st, UInt(Causes.fault_store)), (mem_ctrl.mem && io.dmem.xcpt.pf.st, UInt(Causes.fault_store)),
@ -412,11 +435,10 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) {
wb_reg_valid := !ctrl_killm wb_reg_valid := !ctrl_killm
wb_reg_replay := replay_mem && !take_pc_wb wb_reg_replay := replay_mem && !take_pc_wb
wb_reg_xcpt := mem_xcpt && !take_pc_wb wb_reg_xcpt := mem_xcpt && !take_pc_wb
wb_reg_mem_xcpt := mem_reg_valid && mem_new_xcpt && !(mem_reg_xcpt_interrupt || mem_reg_xcpt)
when (mem_xcpt) { wb_reg_cause := mem_cause } when (mem_xcpt) { wb_reg_cause := mem_cause }
when (mem_reg_valid || mem_reg_replay || mem_reg_xcpt_interrupt) { when (mem_reg_valid || mem_reg_replay || mem_reg_xcpt_interrupt) {
wb_ctrl := mem_ctrl wb_ctrl := mem_ctrl
wb_reg_wdata := Mux(mem_ctrl.fp && mem_ctrl.wxd, io.fpu.toint_data, mem_int_wdata) wb_reg_wdata := Mux(!mem_reg_xcpt && mem_ctrl.fp && mem_ctrl.wxd, io.fpu.toint_data, mem_int_wdata)
when (mem_ctrl.rocc) { when (mem_ctrl.rocc) {
wb_reg_rs2 := mem_reg_rs2 wb_reg_rs2 := mem_reg_rs2
} }
@ -434,7 +456,7 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) {
// writeback arbitration // writeback arbitration
val dmem_resp_xpu = !io.dmem.resp.bits.tag(0).toBool val dmem_resp_xpu = !io.dmem.resp.bits.tag(0).toBool
val dmem_resp_fpu = io.dmem.resp.bits.tag(0).toBool val dmem_resp_fpu = io.dmem.resp.bits.tag(0).toBool
val dmem_resp_waddr = io.dmem.resp.bits.tag >> 1 val dmem_resp_waddr = io.dmem.resp.bits.tag(5, 1)
val dmem_resp_valid = io.dmem.resp.valid && io.dmem.resp.bits.has_data val dmem_resp_valid = io.dmem.resp.valid && io.dmem.resp.bits.has_data
val dmem_resp_replay = dmem_resp_valid && io.dmem.resp.bits.replay val dmem_resp_replay = dmem_resp_valid && io.dmem.resp.bits.replay
@ -479,7 +501,7 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) {
io.rocc.csr <> csr.io.rocc.csr io.rocc.csr <> csr.io.rocc.csr
csr.io.rocc.interrupt <> io.rocc.interrupt csr.io.rocc.interrupt <> io.rocc.interrupt
csr.io.pc := wb_reg_pc csr.io.pc := wb_reg_pc
csr.io.badaddr := Mux(wb_reg_mem_xcpt, encodeVirtualAddress(wb_reg_wdata, wb_reg_wdata), wb_reg_pc) csr.io.badaddr := encodeVirtualAddress(wb_reg_wdata, wb_reg_wdata)
io.ptw.ptbr := csr.io.ptbr io.ptw.ptbr := csr.io.ptbr
io.ptw.invalidate := csr.io.fatc io.ptw.invalidate := csr.io.fatc
io.ptw.status := csr.io.status io.ptw.status := csr.io.status
@ -495,7 +517,7 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) {
(io.fpu.dec.ren3, id_raddr3), (io.fpu.dec.ren3, id_raddr3),
(io.fpu.dec.wen, id_waddr)) (io.fpu.dec.wen, id_waddr))
val sboard = new Scoreboard(32) val sboard = new Scoreboard(32, true)
sboard.clear(ll_wen, ll_waddr) sboard.clear(ll_wen, ll_waddr)
val id_sboard_hazard = checkHazards(hazard_targets, sboard.read _) val id_sboard_hazard = checkHazards(hazard_targets, sboard.read _)
sboard.set(wb_set_sboard && wb_wen, wb_waddr) sboard.set(wb_set_sboard && wb_wen, wb_waddr)
@ -542,7 +564,7 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) {
id_ctrl.rocc && rocc_blocked || // reduce activity while RoCC is busy id_ctrl.rocc && rocc_blocked || // reduce activity while RoCC is busy
id_do_fence || id_do_fence ||
csr.io.csr_stall csr.io.csr_stall
ctrl_killd := !io.imem.resp.valid || io.imem.resp.bits.replay || take_pc || ctrl_stalld || csr.io.interrupt ctrl_killd := !ibuf.io.inst(0).valid || ibuf.io.inst(0).bits.replay || take_pc || ctrl_stalld || csr.io.interrupt
io.imem.req.valid := take_pc io.imem.req.valid := take_pc
io.imem.req.bits.speculative := !take_pc_wb io.imem.req.bits.speculative := !take_pc_wb
@ -552,33 +574,35 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) {
mem_npc)).toUInt // mispredicted branch mem_npc)).toUInt // mispredicted branch
io.imem.flush_icache := wb_reg_valid && wb_ctrl.fence_i && !io.dmem.s2_nack io.imem.flush_icache := wb_reg_valid && wb_ctrl.fence_i && !io.dmem.s2_nack
io.imem.flush_tlb := csr.io.fatc io.imem.flush_tlb := csr.io.fatc
io.imem.resp.ready := !ctrl_stalld || csr.io.interrupt || take_pc_mem
io.imem.btb_update.valid := mem_reg_valid && !mem_npc_misaligned && mem_wrong_npc && mem_cfi_taken && !take_pc_wb ibuf.io.inst(0).ready := !ctrl_stalld || csr.io.interrupt
io.imem.btb_update.valid := (mem_reg_replay && mem_reg_btb_hit) || (mem_reg_valid && !take_pc_wb && mem_wrong_npc)
io.imem.btb_update.bits.isValid := !mem_reg_replay && mem_cfi
io.imem.btb_update.bits.isJump := mem_ctrl.jal || mem_ctrl.jalr io.imem.btb_update.bits.isJump := mem_ctrl.jal || mem_ctrl.jalr
io.imem.btb_update.bits.isReturn := mem_ctrl.jalr && mem_reg_inst(19,15) === BitPat("b00??1") io.imem.btb_update.bits.isReturn := mem_ctrl.jalr && mem_reg_inst(19,15) === BitPat("b00??1")
io.imem.btb_update.bits.pc := mem_reg_pc
io.imem.btb_update.bits.target := io.imem.req.bits.pc io.imem.btb_update.bits.target := io.imem.req.bits.pc
io.imem.btb_update.bits.br_pc := mem_reg_pc io.imem.btb_update.bits.br_pc := (if (usingCompressed) mem_reg_pc + Mux(mem_reg_rvc, UInt(0), UInt(2)) else mem_reg_pc)
io.imem.btb_update.bits.pc := ~(~io.imem.btb_update.bits.br_pc | (coreInstBytes*fetchWidth-1))
io.imem.btb_update.bits.prediction.valid := mem_reg_btb_hit io.imem.btb_update.bits.prediction.valid := mem_reg_btb_hit
io.imem.btb_update.bits.prediction.bits := mem_reg_btb_resp io.imem.btb_update.bits.prediction.bits := mem_reg_btb_resp
io.imem.bht_update.valid := mem_reg_valid && mem_ctrl.branch && !take_pc_wb io.imem.bht_update.valid := mem_reg_valid && !take_pc_wb && mem_ctrl.branch
io.imem.bht_update.bits.pc := mem_reg_pc io.imem.bht_update.bits.pc := io.imem.btb_update.bits.pc
io.imem.bht_update.bits.taken := mem_br_taken io.imem.bht_update.bits.taken := mem_br_taken
io.imem.bht_update.bits.mispredict := mem_wrong_npc io.imem.bht_update.bits.mispredict := mem_wrong_npc
io.imem.bht_update.bits.prediction := io.imem.btb_update.bits.prediction io.imem.bht_update.bits.prediction := io.imem.btb_update.bits.prediction
io.imem.ras_update.valid := mem_reg_valid && io.imem.btb_update.bits.isJump && !mem_npc_misaligned && !take_pc_wb io.imem.ras_update.valid := mem_reg_valid && !take_pc_wb
io.imem.ras_update.bits.returnAddr := mem_int_wdata io.imem.ras_update.bits.returnAddr := mem_int_wdata
io.imem.ras_update.bits.isCall := mem_ctrl.wxd && mem_waddr(0) io.imem.ras_update.bits.isCall := io.imem.btb_update.bits.isJump && mem_waddr(0)
io.imem.ras_update.bits.isReturn := io.imem.btb_update.bits.isReturn io.imem.ras_update.bits.isReturn := io.imem.btb_update.bits.isReturn
io.imem.ras_update.bits.prediction := io.imem.btb_update.bits.prediction io.imem.ras_update.bits.prediction := io.imem.btb_update.bits.prediction
io.fpu.valid := !ctrl_killd && id_ctrl.fp io.fpu.valid := !ctrl_killd && id_ctrl.fp
io.fpu.killx := ctrl_killx io.fpu.killx := ctrl_killx
io.fpu.killm := killm_common io.fpu.killm := killm_common
io.fpu.inst := id_inst io.fpu.inst := id_inst(0)
io.fpu.fromint_data := ex_rs(0) io.fpu.fromint_data := ex_rs(0)
io.fpu.dmem_resp_val := dmem_resp_valid && dmem_resp_fpu io.fpu.dmem_resp_val := dmem_resp_valid && dmem_resp_fpu
io.fpu.dmem_resp_data := io.dmem.resp.bits.data_word_bypass io.fpu.dmem_resp_data := io.dmem.resp.bits.data_word_bypass
@ -659,21 +683,22 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) {
Cat(msb, ea(vaddrBits-1,0)) Cat(msb, ea(vaddrBits-1,0))
} }
class Scoreboard(n: Int) class Scoreboard(n: Int, zero: Boolean = false)
{ {
def set(en: Bool, addr: UInt): Unit = update(en, _next | mask(en, addr)) def set(en: Bool, addr: UInt): Unit = update(en, _next | mask(en, addr))
def clear(en: Bool, addr: UInt): Unit = update(en, _next & ~mask(en, addr)) def clear(en: Bool, addr: UInt): Unit = update(en, _next & ~mask(en, addr))
def read(addr: UInt): Bool = r(addr) def read(addr: UInt): Bool = r(addr)
def readBypassed(addr: UInt): Bool = _next(addr) def readBypassed(addr: UInt): Bool = _next(addr)
private val r = Reg(init=Bits(0, n)) private val _r = Reg(init=Bits(0, n))
private val r = if (zero) (_r >> 1 << 1) else _r
private var _next = r private var _next = r
private var ens = Bool(false) private var ens = Bool(false)
private def mask(en: Bool, addr: UInt) = Mux(en, UInt(1) << addr, UInt(0)) private def mask(en: Bool, addr: UInt) = Mux(en, UInt(1) << addr, UInt(0))
private def update(en: Bool, update: UInt) = { private def update(en: Bool, update: UInt) = {
_next = update _next = update
ens = ens || en ens = ens || en
when (ens) { r := _next } when (ens) { _r := _next }
} }
} }
} }

View File

@ -0,0 +1,165 @@
package rocket
import Chisel._
import Chisel.ImplicitConversions._
import Util._
import cde.Parameters
class ExpandedInstruction extends Bundle {
val bits = UInt(width = 32)
val rd = UInt(width = 5)
val rs1 = UInt(width = 5)
val rs2 = UInt(width = 5)
val rs3 = UInt(width = 5)
}
class RVCDecoder(x: UInt)(implicit p: Parameters) {
def inst(bits: UInt, rd: UInt = x(11,7), rs1: UInt = x(19,15), rs2: UInt = x(24,20), rs3: UInt = x(31,27)) = {
val res = Wire(new ExpandedInstruction)
res.bits := bits
res.rd := rd
res.rs1 := rs1
res.rs2 := rs2
res.rs3 := rs3
res
}
def rs1p = Cat(UInt(1,2), x(9,7))
def rs2p = Cat(UInt(1,2), x(4,2))
def rs2 = x(6,2)
def rd = x(11,7)
def addi4spnImm = Cat(x(10,7), x(12,11), x(5), x(6), UInt(0,2))
def lwImm = Cat(x(5), x(12,10), x(6), UInt(0,2))
def ldImm = Cat(x(6,5), x(12,10), UInt(0,3))
def lwspImm = Cat(x(3,2), x(12), x(6,4), UInt(0,2))
def ldspImm = Cat(x(4,2), x(12), x(6,5), UInt(0,3))
def swspImm = Cat(x(8,7), x(12,9), UInt(0,2))
def sdspImm = Cat(x(9,7), x(12,10), UInt(0,3))
def luiImm = Cat(Fill(15, x(12)), x(6,2), UInt(0,12))
def addi16spImm = Cat(Fill(3, x(12)), x(4,3), x(5), x(2), x(6), UInt(0,4))
def addiImm = Cat(Fill(7, x(12)), x(6,2))
def jImm = Cat(Fill(10, x(12)), x(8), x(10,9), x(6), x(7), x(2), x(11), x(5,3), UInt(0,1))
def bImm = Cat(Fill(5, x(12)), x(6,5), x(2), x(11,10), x(4,3), UInt(0,1))
def shamt = Cat(x(12), x(6,2))
def x0 = UInt(0,5)
def ra = UInt(1,5)
def sp = UInt(2,5)
def q0 = {
def addi4spn = {
val opc = Mux(x(12,5).orR, UInt(0x13,7), UInt(0x1F,7))
inst(Cat(addi4spnImm, sp, UInt(0,3), rs2p, opc), rs2p, sp, rs2p)
}
def ld = inst(Cat(ldImm, rs1p, UInt(3,3), rs2p, UInt(0x03,7)), rs2p, rs1p, rs2p)
def lw = inst(Cat(lwImm, rs1p, UInt(2,3), rs2p, UInt(0x03,7)), rs2p, rs1p, rs2p)
def fld = inst(Cat(ldImm, rs1p, UInt(3,3), rs2p, UInt(0x07,7)), rs2p, rs1p, rs2p)
def flw = {
if (p(XLen) == 32) inst(Cat(lwImm, rs1p, UInt(2,3), rs2p, UInt(0x07,7)), rs2p, rs1p, rs2p)
else ld
}
def unimp = inst(Cat(lwImm >> 5, rs2p, rs1p, UInt(2,3), lwImm(4,0), UInt(0x2F,7)), rs2p, rs1p, rs2p)
def sd = inst(Cat(ldImm >> 5, rs2p, rs1p, UInt(3,3), ldImm(4,0), UInt(0x23,7)), rs2p, rs1p, rs2p)
def sw = inst(Cat(lwImm >> 5, rs2p, rs1p, UInt(2,3), lwImm(4,0), UInt(0x23,7)), rs2p, rs1p, rs2p)
def fsd = inst(Cat(ldImm >> 5, rs2p, rs1p, UInt(3,3), ldImm(4,0), UInt(0x27,7)), rs2p, rs1p, rs2p)
def fsw = {
if (p(XLen) == 32) inst(Cat(lwImm >> 5, rs2p, rs1p, UInt(2,3), lwImm(4,0), UInt(0x27,7)), rs2p, rs1p, rs2p)
else sd
}
Seq(addi4spn, fld, lw, flw, unimp, fsd, sw, fsw)
}
def q1 = {
def addi = inst(Cat(addiImm, rd, UInt(0,3), rd, UInt(0x13,7)), rd, rd, rs2p)
def addiw = {
val opc = Mux(rd.orR, UInt(0x1B,7), UInt(0x1F,7))
inst(Cat(addiImm, rd, UInt(0,3), rd, opc), rd, rd, rs2p)
}
def jal = {
if (p(XLen) == 32) inst(Cat(jImm(20), jImm(10,1), jImm(11), jImm(19,12), ra, UInt(0x6F,7)), ra, rd, rs2p)
else addiw
}
def li = inst(Cat(addiImm, x0, UInt(0,3), rd, UInt(0x13,7)), rd, x0, rs2p)
def addi16sp = {
val opc = Mux(addiImm.orR, UInt(0x13,7), UInt(0x1F,7))
inst(Cat(addi16spImm, rd, UInt(0,3), rd, opc), rd, rd, rs2p)
}
def lui = {
val opc = Mux(addiImm.orR, UInt(0x37,7), UInt(0x3F,7))
val me = inst(Cat(luiImm(31,12), rd, opc), rd, rd, rs2p)
Mux(rd === x0 || rd === sp, addi16sp, me)
}
def j = inst(Cat(jImm(20), jImm(10,1), jImm(11), jImm(19,12), x0, UInt(0x6F,7)), x0, rs1p, rs2p)
def beqz = inst(Cat(bImm(12), bImm(10,5), x0, rs1p, UInt(0,3), bImm(4,1), bImm(11), UInt(0x63,7)), rs1p, rs1p, x0)
def bnez = inst(Cat(bImm(12), bImm(10,5), x0, rs1p, UInt(1,3), bImm(4,1), bImm(11), UInt(0x63,7)), x0, rs1p, x0)
def arith = {
def srli = Cat(shamt, rs1p, UInt(5,3), rs1p, UInt(0x13,7))
def srai = srli | UInt(1 << 30)
def andi = Cat(addiImm, rs1p, UInt(7,3), rs1p, UInt(0x13,7))
def rtype = {
val funct = Seq(0.U, 4.U, 6.U, 7.U, 0.U, 0.U, 2.U, 3.U)(Cat(x(12), x(6,5)))
val sub = Mux(x(6,5) === UInt(0), UInt(1 << 30), UInt(0))
val opc = Mux(x(12), UInt(0x3B,7), UInt(0x33,7))
Cat(rs2p, rs1p, funct, rs1p, opc) | sub
}
inst(Seq(srli, srai, andi, rtype)(x(11,10)), rs1p, rs1p, rs2p)
}
Seq(addi, jal, li, lui, arith, j, beqz, bnez)
}
def q2 = {
def slli = inst(Cat(shamt, rd, UInt(1,3), rd, UInt(0x13,7)), rd, rd, rs2)
def ldsp = inst(Cat(ldspImm, sp, UInt(3,3), rd, UInt(0x03,7)), rd, sp, rs2)
def lwsp = inst(Cat(lwspImm, sp, UInt(2,3), rd, UInt(0x03,7)), rd, sp, rs2)
def fldsp = inst(Cat(ldspImm, sp, UInt(3,3), rd, UInt(0x07,7)), rd, sp, rs2)
def flwsp = {
if (p(XLen) == 32) inst(Cat(lwspImm, sp, UInt(2,3), rd, UInt(0x07,7)), rd, sp, rs2)
else ldsp
}
def sdsp = inst(Cat(sdspImm >> 5, rs2, sp, UInt(3,3), sdspImm(4,0), UInt(0x23,7)), rd, sp, rs2)
def swsp = inst(Cat(swspImm >> 5, rs2, sp, UInt(2,3), swspImm(4,0), UInt(0x23,7)), rd, sp, rs2)
def fsdsp = inst(Cat(sdspImm >> 5, rs2, sp, UInt(3,3), sdspImm(4,0), UInt(0x27,7)), rd, sp, rs2)
def fswsp = {
if (p(XLen) == 32) inst(Cat(swspImm >> 5, rs2, sp, UInt(2,3), swspImm(4,0), UInt(0x27,7)), rd, sp, rs2)
else sdsp
}
def jalr = {
val mv = inst(Cat(rs2, x0, UInt(0,3), rd, UInt(0x33,7)), rd, x0, rs2)
val add = inst(Cat(rs2, rd, UInt(0,3), rd, UInt(0x33,7)), rd, rd, rs2)
val jr = Cat(rs2, rd, UInt(0,3), x0, UInt(0x67,7))
val reserved = Cat(jr >> 7, UInt(0x1F,7))
val jr_reserved = inst(Mux(rd.orR, jr, reserved), x0, rd, rs2)
val jr_mv = Mux(rs2.orR, mv, jr_reserved)
val jalr = Cat(rs2, rd, UInt(0,3), ra, UInt(0x67,7))
val ebreak = Cat(jr >> 7, UInt(0x73,7)) | UInt(1 << 20)
val jalr_ebreak = inst(Mux(rd.orR, jalr, ebreak), ra, rd, rs2)
val jalr_add = Mux(rs2.orR, add, jalr_ebreak)
Mux(x(12), jalr_add, jr_mv)
}
Seq(slli, fldsp, lwsp, flwsp, jalr, fsdsp, swsp, fswsp)
}
def q3 = Seq.fill(8)(passthrough)
def passthrough = inst(x)
def decode = {
val s = q0 ++ q1 ++ q2 ++ q3
s(Cat(x(1,0), x(15,13)))
}
}
class RVCExpander(implicit p: Parameters) extends Module {
val io = new Bundle {
val in = UInt(INPUT, 32)
val out = new ExpandedInstruction
val rvc = Bool(OUTPUT)
}
if (p(UseCompressed)) {
io.rvc := io.in(1,0) =/= UInt(3)
io.out := new RVCDecoder(io.in).decode
} else {
io.rvc := Bool(false)
io.out := new RVCDecoder(io.in).passthrough
}
}

View File

@ -186,7 +186,7 @@ class BaseConfig extends Config (
case RoccNPTWPorts => site(BuildRoCC).map(_.nPTWPorts).foldLeft(0)(_ + _) case RoccNPTWPorts => site(BuildRoCC).map(_.nPTWPorts).foldLeft(0)(_ + _)
case RoccNCSRs => site(BuildRoCC).map(_.csrs.size).foldLeft(0)(_ + _) case RoccNCSRs => site(BuildRoCC).map(_.csrs.size).foldLeft(0)(_ + _)
//Rocket Core Constants //Rocket Core Constants
case FetchWidth => 1 case FetchWidth => if (site(UseCompressed)) 2 else 1
case RetireWidth => 1 case RetireWidth => 1
case UseVM => true case UseVM => true
case UseUser => true case UseUser => true
@ -216,6 +216,11 @@ class BaseConfig extends Config (
TestGeneration.addSuites(env.map(if (site(XLen) == 64) rv64ua else rv32ua)) TestGeneration.addSuites(env.map(if (site(XLen) == 64) rv64ua else rv32ua))
true true
} }
case UseCompressed => {
val env = if(site(UseVM)) List("p","v") else List("p")
TestGeneration.addSuites(env.map(if (site(XLen) == 64) rv64uc else rv32uc))
true
}
case NExtInterrupts => 2 case NExtInterrupts => 2
case AsyncMMIOChannels => false case AsyncMMIOChannels => false
case ExtMMIOPorts => AddrMap() case ExtMMIOPorts => AddrMap()
@ -234,7 +239,7 @@ class BaseConfig extends Config (
case FDivSqrt => true case FDivSqrt => true
case SFMALatency => 2 case SFMALatency => 2
case DFMALatency => 3 case DFMALatency => 3
case CoreInstBits => 32 case CoreInstBits => if (site(UseCompressed)) 16 else 32
case CoreDataBits => site(XLen) case CoreDataBits => site(XLen)
case NCustomMRWCSRs => 0 case NCustomMRWCSRs => 0
case ResetVector => BigInt(0x1000) case ResetVector => BigInt(0x1000)

View File

@ -99,6 +99,9 @@ object DefaultTestSuites {
"slt", "slti", "sra", "srai", "srl", "srli", "sub", "xor", "xori") "slt", "slti", "sra", "srai", "srl", "srli", "sub", "xor", "xori")
val rv32ui = new AssemblyTestSuite("rv32ui", rv32uiNames)(_) val rv32ui = new AssemblyTestSuite("rv32ui", rv32uiNames)(_)
val rv32ucNames = LinkedHashSet("rvc")
val rv32uc = new AssemblyTestSuite("rv32uc", rv32ucNames)(_)
val rv32umNames = LinkedHashSet("mul", "mulh", "mulhsu", "mulhu", "div", "divu", "rem", "remu") val rv32umNames = LinkedHashSet("mul", "mulh", "mulhsu", "mulhu", "div", "divu", "rem", "remu")
val rv32um = new AssemblyTestSuite("rv32um", rv32umNames)(_) val rv32um = new AssemblyTestSuite("rv32um", rv32umNames)(_)
@ -124,6 +127,9 @@ object DefaultTestSuites {
val rv64uaNames = rv32uaNames.map(_.replaceAll("_w","_d")) val rv64uaNames = rv32uaNames.map(_.replaceAll("_w","_d"))
val rv64ua = new AssemblyTestSuite("rv64ua", rv32uaNames ++ rv64uaNames)(_) val rv64ua = new AssemblyTestSuite("rv64ua", rv32uaNames ++ rv64uaNames)(_)
val rv64ucNames = rv32ucNames
val rv64uc = new AssemblyTestSuite("rv64uc", rv64ucNames)(_)
val rv64ufNames = LinkedHashSet("ldst", "move", "fsgnj", "fcmp", "fcvt", "fcvt_w", "fclass", "fadd", "fdiv", "fmin", "fmadd") val rv64ufNames = LinkedHashSet("ldst", "move", "fsgnj", "fcmp", "fcvt", "fcvt_w", "fclass", "fadd", "fdiv", "fmin", "fmadd")
val rv64uf = new AssemblyTestSuite("rv64uf", rv64ufNames)(_) val rv64uf = new AssemblyTestSuite("rv64uf", rv64ufNames)(_)
val rv64ufNoDiv = new AssemblyTestSuite("rv64uf", rv64ufNames - "fdiv")(_) val rv64ufNoDiv = new AssemblyTestSuite("rv64uf", rv64ufNames - "fdiv")(_)
@ -148,10 +154,10 @@ object DefaultTestSuites {
val rv64i = List(rv64ui, rv64si, rv64mi) val rv64i = List(rv64ui, rv64si, rv64mi)
val rv64pi = List(rv64ui, rv64mi) val rv64pi = List(rv64ui, rv64mi)
val benchmarks = new BenchmarkTestSuite("basic", "$(RISCV)/riscv64-unknown-elf/share/riscv-tests/benchmarks", LinkedHashSet( val benchmarks = new BenchmarkTestSuite("rvi", "$(RISCV)/riscv64-unknown-elf/share/riscv-tests/benchmarks", LinkedHashSet(
"median", "multiply", "qsort", "towers", "vvadd", "dhrystone", "mt-matmul")) "median", "multiply", "qsort", "towers", "vvadd", "dhrystone", "mt-matmul"))
val rv32udBenchmarks = new BenchmarkTestSuite("basic", "$(RISCV)/riscv64-unknown-elf/share/riscv-tests/benchmarks", LinkedHashSet( val rv32udBenchmarks = new BenchmarkTestSuite("rvd", "$(RISCV)/riscv64-unknown-elf/share/riscv-tests/benchmarks", LinkedHashSet(
"mm", "spmv", "mt-vvadd")) "mm", "spmv", "mt-vvadd"))
val emptyBmarks = new BenchmarkTestSuite("empty", val emptyBmarks = new BenchmarkTestSuite("empty",