Merge branch 'master' into async_queue_option
This commit is contained in:
commit
9bb0d92381
@ -86,6 +86,15 @@ class BHT(nbht: Int)(implicit val p: Parameters) extends HasCoreParameters {
|
||||
val history = Reg(UInt(width = nbhtbits))
|
||||
}
|
||||
|
||||
object CFIType {
|
||||
def SZ = 2
|
||||
def apply() = UInt(width = SZ)
|
||||
def branch = 0.U
|
||||
def jump = 1.U
|
||||
def call = 2.U
|
||||
def ret = 3.U
|
||||
}
|
||||
|
||||
// BTB update occurs during branch resolution (and only on a mispredict).
|
||||
// - "pc" is what future fetch PCs will tag match against.
|
||||
// - "br_pc" is the PC of the branch instruction.
|
||||
@ -95,9 +104,8 @@ class BTBUpdate(implicit p: Parameters) extends BtbBundle()(p) {
|
||||
val target = UInt(width = vaddrBits)
|
||||
val taken = Bool()
|
||||
val isValid = Bool()
|
||||
val isJump = Bool()
|
||||
val isReturn = Bool()
|
||||
val br_pc = UInt(width = vaddrBits)
|
||||
val cfiType = CFIType()
|
||||
}
|
||||
|
||||
// BHT update occurs during branch resolution on all conditional branches.
|
||||
@ -110,8 +118,7 @@ class BHTUpdate(implicit p: Parameters) extends BtbBundle()(p) {
|
||||
}
|
||||
|
||||
class RASUpdate(implicit p: Parameters) extends BtbBundle()(p) {
|
||||
val isCall = Bool()
|
||||
val isReturn = Bool()
|
||||
val cfiType = CFIType()
|
||||
val returnAddr = UInt(width = vaddrBits)
|
||||
val prediction = Valid(new BTBResp)
|
||||
}
|
||||
@ -121,6 +128,7 @@ class RASUpdate(implicit p: Parameters) extends BtbBundle()(p) {
|
||||
// - "mask" provides a mask of valid instructions (instructions are
|
||||
// masked off by the predicted taken branch from the BTB).
|
||||
class BTBResp(implicit p: Parameters) extends BtbBundle()(p) {
|
||||
val cfiType = CFIType()
|
||||
val taken = Bool()
|
||||
val mask = Bits(width = fetchWidth)
|
||||
val bridx = Bits(width = log2Up(fetchWidth))
|
||||
@ -154,8 +162,7 @@ class BTB(implicit p: Parameters) extends BtbModule {
|
||||
val pageValid = Reg(init = UInt(0, nPages))
|
||||
|
||||
val isValid = Reg(init = UInt(0, entries))
|
||||
val isReturn = Reg(UInt(width = entries))
|
||||
val isJump = Reg(UInt(width = entries))
|
||||
val cfiType = Reg(Vec(entries, CFIType()))
|
||||
val brIdx = Reg(Vec(entries, UInt(width=log2Up(fetchWidth))))
|
||||
|
||||
private def page(addr: UInt) = addr >> matchBits
|
||||
@ -210,9 +217,8 @@ class BTB(implicit p: Parameters) extends BtbModule {
|
||||
tgts(waddr) := update_target(matchBits-1, log2Up(coreInstBytes))
|
||||
idxPages(waddr) := idxPageUpdate +& 1 // the +1 corresponds to the <<1 on io.resp.valid
|
||||
tgtPages(waddr) := tgtPageUpdate
|
||||
cfiType(waddr) := r_btb_update.bits.cfiType
|
||||
isValid := Mux(r_btb_update.bits.isValid, isValid | mask, isValid & ~mask)
|
||||
isReturn := Mux(r_btb_update.bits.isReturn, isReturn | mask, isReturn & ~mask)
|
||||
isJump := Mux(r_btb_update.bits.isJump, isJump | mask, isJump & ~mask)
|
||||
if (fetchWidth > 1)
|
||||
brIdx(waddr) := r_btb_update.bits.br_pc >> log2Up(coreInstBytes)
|
||||
|
||||
@ -236,6 +242,7 @@ class BTB(implicit p: Parameters) extends BtbModule {
|
||||
io.resp.bits.entry := OHToUInt(idxHit)
|
||||
io.resp.bits.bridx := (if (fetchWidth > 1) Mux1H(idxHit, brIdx) else UInt(0))
|
||||
io.resp.bits.mask := Cat((UInt(1) << ~Mux(io.resp.bits.taken, ~io.resp.bits.bridx, UInt(0)))-1, UInt(1))
|
||||
io.resp.bits.cfiType := Mux1H(idxHit, cfiType)
|
||||
|
||||
// if multiple entries for same PC land in BTB, zap them
|
||||
when (PopCountAtLeast(idxHit, 2)) {
|
||||
@ -244,7 +251,7 @@ class BTB(implicit p: Parameters) extends BtbModule {
|
||||
|
||||
if (nBHT > 0) {
|
||||
val bht = new BHT(nBHT)
|
||||
val isBranch = !(idxHit & isJump).orR
|
||||
val isBranch = (idxHit & cfiType.map(_ === CFIType.branch).asUInt).orR
|
||||
val res = bht.get(io.req.bits.addr, io.req.valid && io.resp.valid && isBranch)
|
||||
val update_btb_hit = io.bht_update.bits.prediction.valid
|
||||
when (io.bht_update.valid && update_btb_hit) {
|
||||
@ -256,17 +263,14 @@ class BTB(implicit p: Parameters) extends BtbModule {
|
||||
|
||||
if (nRAS > 0) {
|
||||
val ras = new RAS(nRAS)
|
||||
val doPeek = (idxHit & isReturn).orR
|
||||
val doPeek = (idxHit & cfiType.map(_ === CFIType.ret).asUInt).orR
|
||||
when (!ras.isEmpty && doPeek) {
|
||||
io.resp.bits.target := ras.peek
|
||||
}
|
||||
when (io.ras_update.valid) {
|
||||
when (io.ras_update.bits.isCall) {
|
||||
when (io.ras_update.bits.cfiType === CFIType.call) {
|
||||
ras.push(io.ras_update.bits.returnAddr)
|
||||
when (doPeek) {
|
||||
io.resp.bits.target := io.ras_update.bits.returnAddr
|
||||
}
|
||||
}.elsewhen (io.ras_update.bits.isReturn && io.ras_update.bits.prediction.valid) {
|
||||
}.elsewhen (io.ras_update.bits.cfiType === CFIType.ret && io.ras_update.bits.prediction.valid) {
|
||||
ras.pop()
|
||||
}
|
||||
}
|
||||
|
@ -300,10 +300,10 @@ class CSRFile(perfEventSets: EventSets = new EventSets(Seq()))(implicit p: Param
|
||||
val pending_interrupts = read_mip & reg_mie
|
||||
val m_interrupts = Mux(reg_mstatus.prv <= PRV.S || (reg_mstatus.prv === PRV.M && reg_mstatus.mie), pending_interrupts & ~reg_mideleg, UInt(0))
|
||||
val s_interrupts = Mux(m_interrupts === 0 && (reg_mstatus.prv < PRV.S || (reg_mstatus.prv === PRV.S && reg_mstatus.sie)), pending_interrupts & reg_mideleg, UInt(0))
|
||||
val all_interrupts = m_interrupts | s_interrupts
|
||||
val (anyInterrupt, whichInterrupt) = chooseInterrupt(Seq(s_interrupts, m_interrupts))
|
||||
val interruptMSB = BigInt(1) << (xLen-1)
|
||||
val interruptCause = UInt(interruptMSB) + PriorityEncoder(all_interrupts)
|
||||
io.interrupt := all_interrupts.orR && !reg_debug && !io.singleStep || reg_singleStepped
|
||||
val interruptCause = UInt(interruptMSB) + whichInterrupt
|
||||
io.interrupt := anyInterrupt && !reg_debug && !io.singleStep || reg_singleStepped
|
||||
io.interrupt_cause := interruptCause
|
||||
io.bp := reg_bp take nBreakpoints
|
||||
io.pmp := reg_pmp.map(PMP(_))
|
||||
@ -758,6 +758,14 @@ class CSRFile(perfEventSets: EventSets = new EventSets(Seq()))(implicit p: Param
|
||||
}
|
||||
}
|
||||
|
||||
def chooseInterrupt(masks: Seq[UInt]) = {
|
||||
// we can't simply choose the highest-numbered interrupt, because timer
|
||||
// interrupts are in the wrong place in mip.
|
||||
val timerMask = UInt(0xF0, xLen)
|
||||
val masked = masks.map(m => Cat(m.padTo(xLen) & ~timerMask, m.padTo(xLen) & timerMask))
|
||||
(masks.map(_.orR).reduce(_||_), Log2(masked.asUInt)(log2Ceil(xLen)-1, 0))
|
||||
}
|
||||
|
||||
def readModifyWriteCSR(cmd: UInt, rdata: UInt, wdata: UInt) =
|
||||
(Mux(cmd.isOneOf(CSR.S, CSR.C), rdata, UInt(0)) | wdata) & ~Mux(cmd === CSR.C, wdata, UInt(0))
|
||||
|
||||
|
@ -11,6 +11,7 @@ import uncore.tilelink2._
|
||||
import uncore.util._
|
||||
import util._
|
||||
import TLMessages._
|
||||
import scala.math.min
|
||||
|
||||
class DCacheDataReq(implicit p: Parameters) extends L1HellaCacheBundle()(p) {
|
||||
val addr = Bits(width = untagBits)
|
||||
@ -57,6 +58,19 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) {
|
||||
data.io.req <> dataArb.io.out
|
||||
dataArb.io.out.ready := true
|
||||
|
||||
val rational = p(coreplex.RocketCrossing) match {
|
||||
case coreplex.RationalCrossing(_) => true
|
||||
case _ => false
|
||||
}
|
||||
|
||||
val tl_out_a = Wire(tl_out.a)
|
||||
val q_depth = if (rational) min(2, maxUncachedInFlight-1) else 0
|
||||
if (q_depth <= 0) {
|
||||
tl_out.a <> tl_out_a
|
||||
} else {
|
||||
tl_out.a <> Queue(tl_out_a, q_depth, flow = true, pipe = true)
|
||||
}
|
||||
|
||||
val s1_valid = Reg(next=io.cpu.req.fire(), init=Bool(false))
|
||||
val s1_probe = Reg(next=tl_out.b.fire(), init=Bool(false))
|
||||
val probe_bits = RegEnable(tl_out.b.bits, tl_out.b.fire()) // TODO has data now :(
|
||||
@ -176,7 +190,7 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) {
|
||||
val (s2_prb_ack_data, s2_report_param, probeNewCoh)= s2_probe_state.onProbe(probe_bits.param)
|
||||
val (s2_victim_dirty, s2_shrink_param, voluntaryNewCoh) = s2_victim_state.onCacheControl(M_FLUSH)
|
||||
val s2_update_meta = s2_hit_state =/= s2_new_hit_state
|
||||
io.cpu.s2_nack := s2_valid && !s2_valid_hit && !(s2_valid_uncached && tl_out.a.ready && !uncachedInFlight.asUInt.andR)
|
||||
io.cpu.s2_nack := s2_valid && !s2_valid_hit && !(s2_valid_uncached && tl_out_a.ready && !uncachedInFlight.asUInt.andR)
|
||||
when (io.cpu.s2_nack || (s2_valid_hit && s2_update_meta)) { s1_nack := true }
|
||||
|
||||
val s3_valid = Reg(next = s2_valid, init=Bool(false))
|
||||
@ -285,17 +299,17 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) {
|
||||
M_XA_MAXU -> edge.Arithmetic(a_source, access_address, a_size, a_data, TLAtomics.MAXU)._2))
|
||||
} else {
|
||||
// If no managers support atomics, assert fail if processor asks for them
|
||||
assert (!(tl_out.a.valid && pstore1_amo && s2_write && s2_uncached))
|
||||
assert (!(tl_out_a.valid && pstore1_amo && s2_write && s2_uncached))
|
||||
Wire(new TLBundleA(edge.bundle))
|
||||
}
|
||||
|
||||
tl_out.a.valid := (s2_valid_cached_miss && !s2_victim_dirty) ||
|
||||
tl_out_a.valid := (s2_valid_cached_miss && !s2_victim_dirty) ||
|
||||
(s2_valid_uncached && !uncachedInFlight.asUInt.andR)
|
||||
tl_out.a.bits := Mux(!s2_uncached, acquire, Mux(!s2_write, get, Mux(!pstore1_amo, put, atomics)))
|
||||
tl_out_a.bits := Mux(!s2_uncached, acquire, Mux(!s2_write, get, Mux(!pstore1_amo, put, atomics)))
|
||||
|
||||
// Set pending bits for outstanding TileLink transaction
|
||||
val a_sel = UIntToOH(a_source, maxUncachedInFlight+mmioOffset) >> mmioOffset
|
||||
when (tl_out.a.fire()) {
|
||||
when (tl_out_a.fire()) {
|
||||
when (s2_uncached) {
|
||||
(a_sel.toBools zip (uncachedInFlight zip uncachedReqs)) foreach { case (s, (f, r)) =>
|
||||
when (s) {
|
||||
@ -518,7 +532,7 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) {
|
||||
val flushed = Reg(init=Bool(true))
|
||||
val flushing = Reg(init=Bool(false))
|
||||
val flushCounter = Counter(nSets * nWays)
|
||||
when (tl_out.a.fire() && !s2_uncached) { flushed := false }
|
||||
when (tl_out_a.fire() && !s2_uncached) { flushed := false }
|
||||
when (s2_valid_masked && s2_req.cmd === M_FLUSH_ALL) {
|
||||
io.cpu.s2_nack := !flushed
|
||||
when (!flushed) {
|
||||
@ -542,6 +556,6 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) {
|
||||
}
|
||||
|
||||
// performance events
|
||||
io.cpu.acquire := edge.done(tl_out.a)
|
||||
io.cpu.acquire := edge.done(tl_out_a)
|
||||
io.cpu.release := edge.done(tl_out.c)
|
||||
}
|
||||
|
@ -5,6 +5,7 @@ package rocket
|
||||
|
||||
import Chisel._
|
||||
import Chisel.ImplicitConversions._
|
||||
import chisel3.core.withReset
|
||||
import config._
|
||||
import coreplex._
|
||||
import diplomacy._
|
||||
@ -64,11 +65,12 @@ class FrontendModule(outer: Frontend) extends LazyModuleImp(outer)
|
||||
val icache = outer.icache.module
|
||||
|
||||
val tlb = Module(new TLB(log2Ceil(coreInstBytes*fetchWidth), nTLBEntries))
|
||||
val fq = withReset(reset || io.cpu.req.valid) { Module(new ShiftQueue(new FrontendResp, 3, flow = true)) }
|
||||
|
||||
val s0_valid = io.cpu.req.valid || fq.io.enq.ready
|
||||
val s1_pc_ = Reg(UInt(width=vaddrBitsExtended))
|
||||
val s1_pc = ~(~s1_pc_ | (coreInstBytes-1)) // discard PC LSBS (this propagates down the pipeline)
|
||||
val s1_speculative = Reg(Bool())
|
||||
val s1_same_block = Reg(Bool())
|
||||
val s2_valid = Reg(init=Bool(true))
|
||||
val s2_pc = Reg(init=io.resetVector)
|
||||
val s2_btb_resp_valid = Reg(init=Bool(false))
|
||||
@ -82,39 +84,33 @@ class FrontendModule(outer: Frontend) extends LazyModuleImp(outer)
|
||||
val s2_speculative = Reg(init=Bool(false))
|
||||
val s2_cacheable = Reg(init=Bool(false))
|
||||
|
||||
val ntpc = ~(~s1_pc | (coreInstBytes*fetchWidth-1)) + UInt(coreInstBytes*fetchWidth)
|
||||
val ntpc_same_block = (ntpc & rowBytes) === (s1_pc & rowBytes)
|
||||
val fetchBytes = coreInstBytes * fetchWidth
|
||||
val s1_base_pc = ~(~s1_pc | (fetchBytes - 1))
|
||||
val ntpc = s1_base_pc + fetchBytes.U
|
||||
val predicted_npc = Wire(init = ntpc)
|
||||
val predicted_taken = Wire(init = Bool(false))
|
||||
val icmiss = s2_valid && !icache.io.resp.valid
|
||||
val npc = Mux(icmiss, s2_pc, predicted_npc)
|
||||
val s0_same_block = !predicted_taken && !icmiss && !io.cpu.req.valid && ntpc_same_block
|
||||
|
||||
val stall = io.cpu.resp.valid && !io.cpu.resp.ready
|
||||
when (!stall) {
|
||||
s1_same_block := s0_same_block && !tlb.io.resp.miss
|
||||
s1_pc_ := io.cpu.npc
|
||||
// consider RVC fetches across blocks to be non-speculative if the first
|
||||
// part was non-speculative
|
||||
val s0_speculative =
|
||||
if (usingCompressed) s1_speculative || s2_valid && !s2_speculative || predicted_taken
|
||||
else Bool(true)
|
||||
s1_speculative := Mux(icmiss, s2_speculative, s0_speculative)
|
||||
s2_valid := !icmiss
|
||||
when (!icmiss) {
|
||||
s2_pc := s1_pc
|
||||
s2_speculative := s1_speculative
|
||||
s2_cacheable := tlb.io.resp.cacheable
|
||||
s2_maybe_pf := tlb.io.resp.pf.inst
|
||||
s2_maybe_ae := tlb.io.resp.ae.inst
|
||||
s2_tlb_miss := tlb.io.resp.miss
|
||||
}
|
||||
}
|
||||
when (io.cpu.req.valid) {
|
||||
s1_same_block := Bool(false)
|
||||
s1_pc_ := io.cpu.npc
|
||||
s1_speculative := io.cpu.req.bits.speculative
|
||||
s2_valid := Bool(false)
|
||||
val s2_replay = Wire(Bool())
|
||||
s2_replay := (s2_valid && !fq.io.enq.fire()) || RegNext(s2_replay && !s0_valid)
|
||||
val npc = Mux(s2_replay, s2_pc, predicted_npc)
|
||||
|
||||
s1_pc_ := io.cpu.npc
|
||||
// consider RVC fetches across blocks to be non-speculative if the first
|
||||
// part was non-speculative
|
||||
val s0_speculative =
|
||||
if (usingCompressed) s1_speculative || s2_valid && !s2_speculative || predicted_taken
|
||||
else Bool(true)
|
||||
s1_speculative := Mux(io.cpu.req.valid, io.cpu.req.bits.speculative, Mux(s2_replay, s2_speculative, s0_speculative))
|
||||
|
||||
s2_valid := false
|
||||
when (!s2_replay && !io.cpu.req.valid) {
|
||||
s2_valid := true
|
||||
s2_pc := s1_pc
|
||||
s2_speculative := s1_speculative
|
||||
s2_cacheable := tlb.io.resp.cacheable
|
||||
s2_maybe_pf := tlb.io.resp.pf.inst
|
||||
s2_maybe_ae := tlb.io.resp.ae.inst
|
||||
s2_tlb_miss := tlb.io.resp.miss
|
||||
}
|
||||
|
||||
if (usingBTB) {
|
||||
@ -124,7 +120,7 @@ class FrontendModule(outer: Frontend) extends LazyModuleImp(outer)
|
||||
btb.io.btb_update := io.cpu.btb_update
|
||||
btb.io.bht_update := io.cpu.bht_update
|
||||
btb.io.ras_update := io.cpu.ras_update
|
||||
when (!stall && !icmiss) {
|
||||
when (!s2_replay) {
|
||||
btb.io.req.valid := true
|
||||
s2_btb_resp_valid := btb.io.resp.valid
|
||||
s2_btb_resp_bits := btb.io.resp.bits
|
||||
@ -133,10 +129,18 @@ class FrontendModule(outer: Frontend) extends LazyModuleImp(outer)
|
||||
predicted_npc := btb.io.resp.bits.target.sextTo(vaddrBitsExtended)
|
||||
predicted_taken := Bool(true)
|
||||
}
|
||||
|
||||
// push RAS speculatively
|
||||
btb.io.ras_update.valid := btb.io.req.valid && btb.io.resp.valid && btb.io.resp.bits.cfiType.isOneOf(CFIType.call, CFIType.ret)
|
||||
val returnAddrLSBs = btb.io.resp.bits.bridx +& 1
|
||||
btb.io.ras_update.bits.returnAddr :=
|
||||
Mux(returnAddrLSBs(log2Ceil(fetchWidth)), ntpc, s1_base_pc | ((returnAddrLSBs << log2Ceil(coreInstBytes)) & (fetchBytes - 1)))
|
||||
btb.io.ras_update.bits.cfiType := btb.io.resp.bits.cfiType
|
||||
btb.io.ras_update.bits.prediction.valid := true
|
||||
}
|
||||
|
||||
io.ptw <> tlb.io.ptw
|
||||
tlb.io.req.valid := !stall && !icmiss
|
||||
tlb.io.req.valid := !s2_replay
|
||||
tlb.io.req.bits.vaddr := s1_pc
|
||||
tlb.io.req.bits.passthrough := Bool(false)
|
||||
tlb.io.req.bits.instruction := Bool(true)
|
||||
@ -144,27 +148,26 @@ class FrontendModule(outer: Frontend) extends LazyModuleImp(outer)
|
||||
tlb.io.req.bits.sfence := io.cpu.sfence
|
||||
tlb.io.req.bits.size := log2Ceil(coreInstBytes*fetchWidth)
|
||||
|
||||
icache.io.req.valid := !stall && !s0_same_block
|
||||
icache.io.req.valid := s0_valid
|
||||
icache.io.req.bits.addr := io.cpu.npc
|
||||
icache.io.invalidate := io.cpu.flush_icache
|
||||
icache.io.s1_paddr := tlb.io.resp.paddr
|
||||
icache.io.s1_kill := io.cpu.req.valid || tlb.io.resp.miss || icmiss || s1_speculative && !tlb.io.resp.cacheable || tlb.io.resp.pf.inst || tlb.io.resp.ae.inst
|
||||
icache.io.s2_kill := false
|
||||
icache.io.resp.ready := !stall && !s1_same_block
|
||||
icache.io.s1_kill := io.cpu.req.valid || tlb.io.resp.miss || s2_replay
|
||||
icache.io.s2_kill := s2_speculative && !s2_cacheable || s2_xcpt
|
||||
|
||||
val s2_kill = s2_speculative && !s2_cacheable || s2_xcpt
|
||||
io.cpu.resp.valid := s2_valid && (icache.io.resp.valid || s2_kill)
|
||||
io.cpu.resp.bits.pc := s2_pc
|
||||
fq.io.enq.valid := s2_valid && (icache.io.resp.valid || icache.io.s2_kill)
|
||||
fq.io.enq.bits.pc := s2_pc
|
||||
io.cpu.npc := Mux(io.cpu.req.valid, io.cpu.req.bits.pc, npc)
|
||||
|
||||
require(fetchWidth * coreInstBytes <= rowBytes && isPow2(fetchWidth))
|
||||
io.cpu.resp.bits.data := icache.io.resp.bits.datablock >> (s2_pc.extract(log2Ceil(rowBytes)-1,log2Ceil(fetchWidth*coreInstBytes)) << log2Ceil(fetchWidth*coreInstBits))
|
||||
io.cpu.resp.bits.mask := UInt((1 << fetchWidth)-1) << s2_pc.extract(log2Ceil(fetchWidth)+log2Ceil(coreInstBytes)-1, log2Ceil(coreInstBytes))
|
||||
io.cpu.resp.bits.pf := s2_pf
|
||||
io.cpu.resp.bits.ae := s2_ae
|
||||
io.cpu.resp.bits.replay := s2_kill && !icache.io.resp.valid && !s2_xcpt
|
||||
io.cpu.resp.bits.btb.valid := s2_btb_resp_valid
|
||||
io.cpu.resp.bits.btb.bits := s2_btb_resp_bits
|
||||
fq.io.enq.bits.data := icache.io.resp.bits
|
||||
fq.io.enq.bits.mask := UInt((1 << fetchWidth)-1) << s2_pc.extract(log2Ceil(fetchWidth)+log2Ceil(coreInstBytes)-1, log2Ceil(coreInstBytes))
|
||||
fq.io.enq.bits.pf := s2_pf
|
||||
fq.io.enq.bits.ae := s2_ae
|
||||
fq.io.enq.bits.replay := icache.io.s2_kill && !icache.io.resp.valid && !s2_xcpt
|
||||
fq.io.enq.bits.btb.valid := s2_btb_resp_valid
|
||||
fq.io.enq.bits.btb.bits := s2_btb_resp_bits
|
||||
|
||||
io.cpu.resp <> fq.io.deq
|
||||
|
||||
// performance events
|
||||
io.cpu.acquire := edge.done(icache.io.mem(0).a)
|
||||
|
@ -84,7 +84,7 @@ class IBuf(implicit p: Parameters) extends CoreModule {
|
||||
val ae = valid & (Mux(buf.ae, bufMask, UInt(0)) | Mux(io.imem.bits.ae, ~bufMask, UInt(0)))
|
||||
val ic_replay = valid & (Mux(buf.replay, bufMask, UInt(0)) | Mux(io.imem.bits.replay, ~bufMask, UInt(0)))
|
||||
val ibufBTBHitMask = Mux(ibufBTBHit, UIntToOH(ibufBTBResp.bridx), UInt(0))
|
||||
assert(!io.imem.bits.btb.valid || io.imem.bits.btb.bits.bridx >= pcWordBits)
|
||||
assert(!io.imem.valid || !io.imem.bits.btb.valid || io.imem.bits.btb.bits.bridx >= pcWordBits)
|
||||
val icBTBHitMask = Mux(io.imem.bits.btb.valid, UIntToOH(io.imem.bits.btb.bits.bridx +& nBufValid - pcWordBits), UInt(0))
|
||||
val btbHitMask = ibufBTBHitMask & bufMask | icBTBHitMask & ~bufMask
|
||||
|
||||
|
@ -47,7 +47,7 @@ class ICacheBundle(outer: ICache) extends CoreBundle()(outer.p) {
|
||||
val s1_kill = Bool(INPUT) // delayed one cycle w.r.t. req
|
||||
val s2_kill = Bool(INPUT) // delayed two cycles; prevents I$ miss emission
|
||||
|
||||
val resp = Decoupled(new ICacheResp)
|
||||
val resp = Valid(UInt(width = coreInstBits * fetchWidth))
|
||||
val invalidate = Bool(INPUT)
|
||||
val mem = outer.node.bundleOut
|
||||
}
|
||||
@ -65,7 +65,6 @@ class ICacheModule(outer: ICache) extends LazyModuleImp(outer)
|
||||
val s_ready :: s_request :: s_refill :: Nil = Enum(UInt(), 3)
|
||||
val state = Reg(init=s_ready)
|
||||
val invalidated = Reg(Bool())
|
||||
val stall = !io.resp.ready
|
||||
|
||||
val refill_addr = Reg(UInt(width = paddrBits))
|
||||
val s1_tag_hit = Wire(Vec(nWays, Bool()))
|
||||
@ -78,10 +77,10 @@ class ICacheModule(outer: ICache) extends LazyModuleImp(outer)
|
||||
val s1_hit = out_valid && s1_any_tag_hit
|
||||
val s1_miss = s1_valid && state === s_ready && !s1_any_tag_hit
|
||||
|
||||
val s0_valid = io.req.valid && state === s_ready && !(s1_valid && stall)
|
||||
val s0_valid = io.req.valid && state === s_ready
|
||||
val s0_vaddr = io.req.bits.addr
|
||||
|
||||
s1_valid := s0_valid || out_valid && stall
|
||||
s1_valid := s0_valid
|
||||
|
||||
when (s1_miss) { refill_addr := io.s1_paddr }
|
||||
val refill_tag = refill_addr(tagBits+untagBits-1,untagBits)
|
||||
@ -110,16 +109,34 @@ class ICacheModule(outer: ICache) extends LazyModuleImp(outer)
|
||||
}
|
||||
|
||||
val s1_tag_disparity = Wire(Vec(nWays, Bool()))
|
||||
val s1_dout = Wire(Vec(nWays, UInt(width = code.width(rowBits))))
|
||||
val wordBits = coreInstBits * fetchWidth
|
||||
val s1_dout = Wire(Vec(nWays, UInt(width = code.width(wordBits))))
|
||||
val s1_dout_valid = RegNext(s0_valid)
|
||||
|
||||
for (i <- 0 until nWays) {
|
||||
val s1_vb = vb_array(Cat(UInt(i), io.s1_paddr(untagBits-1,blockOffBits))).toBool
|
||||
s1_tag_disparity(i) := (code.decode(tag_rdata(i)).error holdUnless s1_dout_valid)
|
||||
s1_tag_hit(i) := s1_vb && ((code.decode(tag_rdata(i)).uncorrected === s1_tag) holdUnless s1_dout_valid)
|
||||
s1_tag_disparity(i) := code.decode(tag_rdata(i)).error
|
||||
s1_tag_hit(i) := s1_vb && code.decode(tag_rdata(i)).uncorrected === s1_tag
|
||||
}
|
||||
|
||||
val data_arrays = Seq.fill(nWays) { SeqMem(nSets * refillCycles, Bits(width = code.width(rowBits))) }
|
||||
require(rowBits % wordBits == 0)
|
||||
val data_arrays = Seq.fill(rowBits / wordBits) { SeqMem(nSets * refillCycles, Vec(nWays, UInt(width = code.width(wordBits)))) }
|
||||
for ((data_array, i) <- data_arrays zipWithIndex) {
|
||||
val wen = tl_out.d.valid
|
||||
when (wen) {
|
||||
val idx = (refill_idx << log2Ceil(refillCycles)) | refill_cnt
|
||||
val data = tl_out.d.bits.data(wordBits*(i+1)-1, wordBits*i)
|
||||
data_array.write(idx, Vec.fill(nWays)(code.encode(data)), (0 until nWays).map(repl_way === _))
|
||||
}
|
||||
def wordMatch(addr: UInt) = addr.extract(log2Ceil(rowBytes)-1, log2Ceil(wordBits/8)) === i
|
||||
val s0_raddr = s0_vaddr(untagBits-1,blockOffBits-log2Ceil(refillCycles))
|
||||
val dout = data_array.read(s0_raddr, !wen && (s0_valid && wordMatch(s0_vaddr)))
|
||||
when (wordMatch(io.s1_paddr)) {
|
||||
s1_dout := dout
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
for ((data_array, i) <- data_arrays zipWithIndex) {
|
||||
val wen = tl_out.d.valid && repl_way === UInt(i)
|
||||
when (wen) {
|
||||
@ -127,28 +144,29 @@ class ICacheModule(outer: ICache) extends LazyModuleImp(outer)
|
||||
data_array.write((refill_idx << log2Ceil(refillCycles)) | refill_cnt, e_d)
|
||||
}
|
||||
val s0_raddr = s0_vaddr(untagBits-1,blockOffBits-log2Ceil(refillCycles))
|
||||
s1_dout(i) := data_array.read(s0_raddr, !wen && s0_valid) holdUnless s1_dout_valid
|
||||
s1_dout(i) := data_array.read(s0_raddr, !wen && s0_valid)
|
||||
}
|
||||
*/
|
||||
|
||||
// output signals
|
||||
outer.latency match {
|
||||
case 1 =>
|
||||
require(code.width(rowBits) == rowBits) // no ECC
|
||||
io.resp.bits.datablock := Mux1H(s1_tag_hit, s1_dout)
|
||||
io.resp.bits := Mux1H(s1_tag_hit, s1_dout)
|
||||
io.resp.valid := s1_hit
|
||||
case 2 =>
|
||||
val s2_valid = RegEnable(out_valid, Bool(false), !stall)
|
||||
val s2_hit = RegEnable(s1_hit, Bool(false), !stall)
|
||||
val s2_tag_hit = RegEnable(s1_tag_hit, !stall)
|
||||
val s2_dout = RegEnable(s1_dout, !stall)
|
||||
val s2_valid = RegNext(out_valid, Bool(false))
|
||||
val s2_hit = RegNext(s1_hit, Bool(false))
|
||||
val s2_tag_hit = RegEnable(s1_tag_hit, s1_valid)
|
||||
val s2_dout = RegEnable(s1_dout, s1_valid)
|
||||
val s2_way_mux = Mux1H(s2_tag_hit, s2_dout)
|
||||
|
||||
val s2_tag_disparity = RegEnable(s1_tag_disparity, !stall).asUInt.orR
|
||||
val s2_tag_disparity = RegEnable(s1_tag_disparity, s1_valid).asUInt.orR
|
||||
val s2_data_disparity = code.decode(s2_way_mux).error
|
||||
val s2_disparity = s2_tag_disparity || s2_data_disparity
|
||||
when (s2_valid && s2_disparity) { invalidate := true }
|
||||
|
||||
io.resp.bits.datablock := code.decode(s2_way_mux).uncorrected
|
||||
io.resp.bits := code.decode(s2_way_mux).uncorrected
|
||||
io.resp.valid := s2_hit && !s2_disparity
|
||||
}
|
||||
tl_out.a.valid := state === s_request && !io.s2_kill
|
||||
|
@ -173,7 +173,7 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p)
|
||||
val ibuf = Module(new IBuf)
|
||||
val id_expanded_inst = ibuf.io.inst.map(_.bits.inst)
|
||||
val id_inst = id_expanded_inst.map(_.bits)
|
||||
ibuf.io.imem <> (if (usingCompressed) withReset(reset || take_pc) { Queue(io.imem.resp, 1, flow = true) } else io.imem.resp)
|
||||
ibuf.io.imem <> io.imem.resp
|
||||
ibuf.io.kill := take_pc
|
||||
|
||||
require(decodeWidth == 1 /* TODO */ && retireWidth == decodeWidth)
|
||||
@ -520,7 +520,7 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p)
|
||||
|
||||
val sboard = new Scoreboard(32, true)
|
||||
sboard.clear(ll_wen, ll_waddr)
|
||||
val id_sboard_hazard = checkHazards(hazard_targets, sboard.read _)
|
||||
val id_sboard_hazard = checkHazards(hazard_targets, rd => sboard.read(rd) && !(ll_wen && ll_waddr === rd))
|
||||
sboard.set(wb_set_sboard && wb_wen, wb_waddr)
|
||||
|
||||
// stall for RAW/WAW hazards on CSRs, loads, AMOs, and mul/div in execute stage.
|
||||
@ -587,8 +587,11 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p)
|
||||
|
||||
io.imem.btb_update.valid := (mem_reg_replay && mem_reg_btb_hit) || (mem_reg_valid && !take_pc_wb && (((mem_cfi_taken || !mem_cfi) && mem_wrong_npc) || (Bool(fastJAL) && mem_ctrl.jal && !mem_reg_btb_hit)))
|
||||
io.imem.btb_update.bits.isValid := !mem_reg_replay && mem_cfi
|
||||
io.imem.btb_update.bits.isJump := mem_ctrl.jal || mem_ctrl.jalr
|
||||
io.imem.btb_update.bits.isReturn := mem_ctrl.jalr && mem_reg_inst(19,15) === BitPat("b00?01")
|
||||
io.imem.btb_update.bits.cfiType :=
|
||||
Mux((mem_ctrl.jal || mem_ctrl.jalr) && mem_waddr(0), CFIType.call,
|
||||
Mux(mem_ctrl.jalr && mem_reg_inst(19,15) === BitPat("b00?01"), CFIType.ret,
|
||||
Mux(mem_ctrl.jal || mem_ctrl.jalr, CFIType.jump,
|
||||
CFIType.branch)))
|
||||
io.imem.btb_update.bits.target := io.imem.req.bits.pc
|
||||
io.imem.btb_update.bits.br_pc := (if (usingCompressed) mem_reg_pc + Mux(mem_reg_rvc, UInt(0), UInt(2)) else mem_reg_pc)
|
||||
io.imem.btb_update.bits.pc := ~(~io.imem.btb_update.bits.br_pc | (coreInstBytes*fetchWidth-1))
|
||||
@ -601,12 +604,6 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p)
|
||||
io.imem.bht_update.bits.mispredict := mem_wrong_npc
|
||||
io.imem.bht_update.bits.prediction := io.imem.btb_update.bits.prediction
|
||||
|
||||
io.imem.ras_update.valid := mem_reg_valid && !take_pc_wb
|
||||
io.imem.ras_update.bits.returnAddr := mem_int_wdata
|
||||
io.imem.ras_update.bits.isCall := io.imem.btb_update.bits.isJump && mem_waddr(0)
|
||||
io.imem.ras_update.bits.isReturn := io.imem.btb_update.bits.isReturn
|
||||
io.imem.ras_update.bits.prediction := io.imem.btb_update.bits.prediction
|
||||
|
||||
io.fpu.valid := !ctrl_killd && id_ctrl.fp
|
||||
io.fpu.killx := ctrl_killx
|
||||
io.fpu.killm := killm_common
|
||||
|
@ -4,7 +4,7 @@
|
||||
package rocketchip
|
||||
|
||||
import Chisel._
|
||||
import scala.collection.mutable.{LinkedHashSet, ArrayBuffer}
|
||||
import scala.collection.mutable.LinkedHashSet
|
||||
|
||||
abstract class RocketTestSuite {
|
||||
val dir: String
|
||||
@ -56,9 +56,9 @@ class RegressionTestSuite(val names: LinkedHashSet[String]) extends RocketTestSu
|
||||
}
|
||||
|
||||
object TestGeneration {
|
||||
private val suites = ArrayBuffer[RocketTestSuite]()
|
||||
private val suites = collection.mutable.ListMap[String, RocketTestSuite]()
|
||||
|
||||
def addSuite(s: RocketTestSuite) { suites += s }
|
||||
def addSuite(s: RocketTestSuite) { suites += (s.makeTargetName -> s) }
|
||||
|
||||
def addSuites(s: Seq[RocketTestSuite]) { s.foreach(addSuite) }
|
||||
|
||||
@ -93,7 +93,7 @@ run-$kind-tests-fast: $$(addprefix $$(output_dir)/, $$(addsuffix .run, $targets)
|
||||
} else { "\n" }
|
||||
}
|
||||
|
||||
suites.groupBy(_.kind).map { case (kind, s) => gen(kind, s) }.mkString("\n")
|
||||
suites.values.toSeq.groupBy(_.kind).map { case (kind, s) => gen(kind, s) }.mkString("\n")
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -450,6 +450,7 @@ class IntToFP(val latency: Int)(implicit p: Parameters) extends FPUModule()(p) {
|
||||
l2s.io.signedIn := ~in.bits.typ(0)
|
||||
l2s.io.in := intValue
|
||||
l2s.io.roundingMode := in.bits.rm
|
||||
l2s.io.detectTininess := hardfloat.consts.tininess_afterRounding
|
||||
mux.data := sanitizeNaN(l2s.io.out, FType.S)
|
||||
mux.exc := l2s.io.exceptionFlags
|
||||
|
||||
@ -460,6 +461,7 @@ class IntToFP(val latency: Int)(implicit p: Parameters) extends FPUModule()(p) {
|
||||
l2d.io.signedIn := ~in.bits.typ(0)
|
||||
l2d.io.in := intValue
|
||||
l2d.io.roundingMode := in.bits.rm
|
||||
l2d.io.detectTininess := hardfloat.consts.tininess_afterRounding
|
||||
mux.data := Cat(l2d.io.out >> l2s.io.out.getWidth, l2s.io.out)
|
||||
when (!in.bits.singleIn) {
|
||||
mux.data := sanitizeNaN(l2d.io.out, FType.D)
|
||||
@ -511,11 +513,13 @@ class FPToFP(val latency: Int)(implicit p: Parameters) extends FPUModule()(p) {
|
||||
val d2s = Module(new hardfloat.RecFNToRecFN(dExpWidth, dSigWidth, sExpWidth, sSigWidth))
|
||||
d2s.io.in := in.bits.in1
|
||||
d2s.io.roundingMode := in.bits.rm
|
||||
d2s.io.detectTininess := hardfloat.consts.tininess_afterRounding
|
||||
val d2sOut = sanitizeNaN(d2s.io.out, FType.S)
|
||||
|
||||
val s2d = Module(new hardfloat.RecFNToRecFN(sExpWidth, sSigWidth, dExpWidth, dSigWidth))
|
||||
s2d.io.in := maxType.unsafeConvert(in.bits.in1, FType.S)
|
||||
s2d.io.roundingMode := in.bits.rm
|
||||
s2d.io.detectTininess := hardfloat.consts.tininess_afterRounding
|
||||
val s2dOut = sanitizeNaN(s2d.io.out, FType.D)
|
||||
|
||||
when (in.bits.singleOut) {
|
||||
@ -554,6 +558,7 @@ class FPUFMAPipe(val latency: Int, t: FType)(implicit p: Parameters) extends FPU
|
||||
val fma = Module(new hardfloat.MulAddRecFN(t.exp, t.sig))
|
||||
fma.io.op := in.fmaCmd
|
||||
fma.io.roundingMode := in.rm
|
||||
fma.io.detectTininess := hardfloat.consts.tininess_afterRounding
|
||||
fma.io.a := in.in1
|
||||
fma.io.b := in.in2
|
||||
fma.io.c := in.in3
|
||||
@ -775,6 +780,7 @@ class FPU(cfg: FPUParams)(implicit p: Parameters) extends FPUModule()(p) {
|
||||
divSqrt.io.a := fpiu.io.out.bits.in.in1
|
||||
divSqrt.io.b := fpiu.io.out.bits.in.in2
|
||||
divSqrt.io.roundingMode := fpiu.io.out.bits.in.rm
|
||||
divSqrt.io.detectTininess := hardfloat.consts.tininess_afterRounding
|
||||
|
||||
when (divSqrt.io.inValid && divSqrt_inReady) {
|
||||
divSqrt_in_flight := true
|
||||
@ -794,6 +800,7 @@ class FPU(cfg: FPUParams)(implicit p: Parameters) extends FPUModule()(p) {
|
||||
val divSqrt_toSingle = Module(new hardfloat.RecFNToRecFN(11, 53, 8, 24))
|
||||
divSqrt_toSingle.io.in := divSqrt_wdata_double
|
||||
divSqrt_toSingle.io.roundingMode := divSqrt_rm
|
||||
divSqrt_toSingle.io.detectTininess := hardfloat.consts.tininess_afterRounding
|
||||
divSqrt_wdata := Mux(divSqrt_single, Cat(divSqrt_wdata_double >> divSqrt_toSingle.io.out.getWidth, sanitizeNaN(divSqrt_toSingle.io.out, FType.S)), divSqrt_wdata_double)
|
||||
divSqrt_flags := divSqrt_flags_double | Mux(divSqrt_single, divSqrt_toSingle.io.exceptionFlags, Bits(0))
|
||||
} else {
|
||||
|
@ -30,7 +30,8 @@ trait HasExternalInterrupts extends HasTileParameters {
|
||||
// debug, msip, mtip, meip, seip, lip offsets in CSRs
|
||||
def csrIntMap: List[Int] = {
|
||||
val nlips = tileParams.core.nLocalInterrupts
|
||||
List(65535, 3, 7, 11, 9) ++ List.tabulate(nlips)(_ + 16)
|
||||
val seip = if (usingVM) Seq(9) else Nil
|
||||
List(65535, 3, 7, 11) ++ seip ++ List.tabulate(nlips)(_ + 16)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -132,7 +132,13 @@ class TLWidthWidget(innerBeatBytes: Int)(implicit p: Parameters) extends LazyMod
|
||||
} else if (edgeIn.manager.beatBytes > edgeOut.manager.beatBytes) {
|
||||
// split input to output
|
||||
val repeat = Wire(Bool())
|
||||
repeat := split(edgeIn, Repeater(in, repeat), edgeOut, out)
|
||||
val repeated = Repeater(in, repeat)
|
||||
val cated = Wire(repeated)
|
||||
cated <> repeated
|
||||
edgeIn.data(cated.bits) := Cat(
|
||||
edgeIn.data(repeated.bits)(edgeIn.manager.beatBytes*8-1, edgeOut.manager.beatBytes*8),
|
||||
edgeIn.data(in.bits)(edgeOut.manager.beatBytes*8-1, 0))
|
||||
repeat := split(edgeIn, cated, edgeOut, out)
|
||||
} else {
|
||||
// merge input to output
|
||||
merge(edgeIn, in, edgeOut, out)
|
||||
|
@ -38,9 +38,17 @@ package object util {
|
||||
implicit def wcToUInt(c: WideCounter): UInt = c.value
|
||||
|
||||
implicit class UIntToAugmentedUInt(val x: UInt) extends AnyVal {
|
||||
def sextTo(n: Int): UInt =
|
||||
def sextTo(n: Int): UInt = {
|
||||
require(x.getWidth <= n)
|
||||
if (x.getWidth == n) x
|
||||
else Cat(Fill(n - x.getWidth, x(x.getWidth-1)), x)
|
||||
}
|
||||
|
||||
def padTo(n: Int): UInt = {
|
||||
require(x.getWidth <= n)
|
||||
if (x.getWidth == n) x
|
||||
else Cat(UInt(0, n - x.getWidth), x)
|
||||
}
|
||||
|
||||
def extract(hi: Int, lo: Int): UInt = {
|
||||
if (hi == lo-1) UInt(0)
|
||||
|
61
src/main/scala/util/ShiftQueue.scala
Normal file
61
src/main/scala/util/ShiftQueue.scala
Normal file
@ -0,0 +1,61 @@
|
||||
// See LICENSE.SiFive for license details.
|
||||
|
||||
package util
|
||||
|
||||
import Chisel._
|
||||
|
||||
/** Implements the same interface as chisel3.util.Queue, but uses a shift
|
||||
* register internally. It is less energy efficient whenever the queue
|
||||
* has more than one entry populated, but is faster on the dequeue side.
|
||||
* It is efficient for usually-empty flow-through queues. */
|
||||
class ShiftQueue[T <: Data](gen: T,
|
||||
val entries: Int,
|
||||
pipe: Boolean = false,
|
||||
flow: Boolean = false)
|
||||
extends Module {
|
||||
val io = IO(new QueueIO(gen, entries) {
|
||||
val mask = UInt(OUTPUT, entries)
|
||||
})
|
||||
|
||||
private val ram = Mem(entries, gen)
|
||||
private val valid = RegInit(UInt(0, entries))
|
||||
private val elts = Reg(Vec(entries, gen))
|
||||
|
||||
private val do_enq = Wire(init=io.enq.fire())
|
||||
private val do_deq = Wire(init=io.deq.fire())
|
||||
|
||||
when (do_deq) {
|
||||
when (!do_enq) { valid := (valid >> 1) }
|
||||
for (i <- 1 until entries)
|
||||
when (valid(i)) { elts(i-1) := elts(i) }
|
||||
}
|
||||
when (do_enq && do_deq) {
|
||||
for (i <- 0 until entries)
|
||||
when (valid(i) && (if (i == entries-1) true.B else !valid(i+1))) { elts(i) := io.enq.bits }
|
||||
}
|
||||
when (do_enq && !do_deq) {
|
||||
valid := (valid << 1) | UInt(1)
|
||||
for (i <- 0 until entries)
|
||||
when (!valid(i) && (if (i == 0) true.B else valid(i-1))) { elts(i) := io.enq.bits }
|
||||
}
|
||||
|
||||
io.enq.ready := !valid(entries-1)
|
||||
io.deq.valid := valid(0)
|
||||
io.deq.bits := elts.head
|
||||
|
||||
if (flow) {
|
||||
when (io.enq.valid) { io.deq.valid := true.B }
|
||||
when (!valid(0)) {
|
||||
io.deq.bits := io.enq.bits
|
||||
do_deq := false.B
|
||||
when (io.deq.ready) { do_enq := false.B }
|
||||
}
|
||||
}
|
||||
|
||||
if (pipe) {
|
||||
when (io.deq.ready) { io.enq.ready := true.B }
|
||||
}
|
||||
|
||||
io.count := PopCount(valid)
|
||||
io.mask := valid
|
||||
}
|
Loading…
Reference in New Issue
Block a user