1
0
Fork 0

Merge branch 'master' into async_queue_option

This commit is contained in:
Henry Cook 2017-04-25 11:23:22 -07:00 committed by GitHub
commit 9bb0d92381
13 changed files with 234 additions and 107 deletions

View File

@ -86,6 +86,15 @@ class BHT(nbht: Int)(implicit val p: Parameters) extends HasCoreParameters {
val history = Reg(UInt(width = nbhtbits))
}
object CFIType {
def SZ = 2
def apply() = UInt(width = SZ)
def branch = 0.U
def jump = 1.U
def call = 2.U
def ret = 3.U
}
// BTB update occurs during branch resolution (and only on a mispredict).
// - "pc" is what future fetch PCs will tag match against.
// - "br_pc" is the PC of the branch instruction.
@ -95,9 +104,8 @@ class BTBUpdate(implicit p: Parameters) extends BtbBundle()(p) {
val target = UInt(width = vaddrBits)
val taken = Bool()
val isValid = Bool()
val isJump = Bool()
val isReturn = Bool()
val br_pc = UInt(width = vaddrBits)
val cfiType = CFIType()
}
// BHT update occurs during branch resolution on all conditional branches.
@ -110,8 +118,7 @@ class BHTUpdate(implicit p: Parameters) extends BtbBundle()(p) {
}
class RASUpdate(implicit p: Parameters) extends BtbBundle()(p) {
val isCall = Bool()
val isReturn = Bool()
val cfiType = CFIType()
val returnAddr = UInt(width = vaddrBits)
val prediction = Valid(new BTBResp)
}
@ -121,6 +128,7 @@ class RASUpdate(implicit p: Parameters) extends BtbBundle()(p) {
// - "mask" provides a mask of valid instructions (instructions are
// masked off by the predicted taken branch from the BTB).
class BTBResp(implicit p: Parameters) extends BtbBundle()(p) {
val cfiType = CFIType()
val taken = Bool()
val mask = Bits(width = fetchWidth)
val bridx = Bits(width = log2Up(fetchWidth))
@ -154,8 +162,7 @@ class BTB(implicit p: Parameters) extends BtbModule {
val pageValid = Reg(init = UInt(0, nPages))
val isValid = Reg(init = UInt(0, entries))
val isReturn = Reg(UInt(width = entries))
val isJump = Reg(UInt(width = entries))
val cfiType = Reg(Vec(entries, CFIType()))
val brIdx = Reg(Vec(entries, UInt(width=log2Up(fetchWidth))))
private def page(addr: UInt) = addr >> matchBits
@ -210,9 +217,8 @@ class BTB(implicit p: Parameters) extends BtbModule {
tgts(waddr) := update_target(matchBits-1, log2Up(coreInstBytes))
idxPages(waddr) := idxPageUpdate +& 1 // the +1 corresponds to the <<1 on io.resp.valid
tgtPages(waddr) := tgtPageUpdate
cfiType(waddr) := r_btb_update.bits.cfiType
isValid := Mux(r_btb_update.bits.isValid, isValid | mask, isValid & ~mask)
isReturn := Mux(r_btb_update.bits.isReturn, isReturn | mask, isReturn & ~mask)
isJump := Mux(r_btb_update.bits.isJump, isJump | mask, isJump & ~mask)
if (fetchWidth > 1)
brIdx(waddr) := r_btb_update.bits.br_pc >> log2Up(coreInstBytes)
@ -236,6 +242,7 @@ class BTB(implicit p: Parameters) extends BtbModule {
io.resp.bits.entry := OHToUInt(idxHit)
io.resp.bits.bridx := (if (fetchWidth > 1) Mux1H(idxHit, brIdx) else UInt(0))
io.resp.bits.mask := Cat((UInt(1) << ~Mux(io.resp.bits.taken, ~io.resp.bits.bridx, UInt(0)))-1, UInt(1))
io.resp.bits.cfiType := Mux1H(idxHit, cfiType)
// if multiple entries for same PC land in BTB, zap them
when (PopCountAtLeast(idxHit, 2)) {
@ -244,7 +251,7 @@ class BTB(implicit p: Parameters) extends BtbModule {
if (nBHT > 0) {
val bht = new BHT(nBHT)
val isBranch = !(idxHit & isJump).orR
val isBranch = (idxHit & cfiType.map(_ === CFIType.branch).asUInt).orR
val res = bht.get(io.req.bits.addr, io.req.valid && io.resp.valid && isBranch)
val update_btb_hit = io.bht_update.bits.prediction.valid
when (io.bht_update.valid && update_btb_hit) {
@ -256,17 +263,14 @@ class BTB(implicit p: Parameters) extends BtbModule {
if (nRAS > 0) {
val ras = new RAS(nRAS)
val doPeek = (idxHit & isReturn).orR
val doPeek = (idxHit & cfiType.map(_ === CFIType.ret).asUInt).orR
when (!ras.isEmpty && doPeek) {
io.resp.bits.target := ras.peek
}
when (io.ras_update.valid) {
when (io.ras_update.bits.isCall) {
when (io.ras_update.bits.cfiType === CFIType.call) {
ras.push(io.ras_update.bits.returnAddr)
when (doPeek) {
io.resp.bits.target := io.ras_update.bits.returnAddr
}
}.elsewhen (io.ras_update.bits.isReturn && io.ras_update.bits.prediction.valid) {
}.elsewhen (io.ras_update.bits.cfiType === CFIType.ret && io.ras_update.bits.prediction.valid) {
ras.pop()
}
}

View File

@ -300,10 +300,10 @@ class CSRFile(perfEventSets: EventSets = new EventSets(Seq()))(implicit p: Param
val pending_interrupts = read_mip & reg_mie
val m_interrupts = Mux(reg_mstatus.prv <= PRV.S || (reg_mstatus.prv === PRV.M && reg_mstatus.mie), pending_interrupts & ~reg_mideleg, UInt(0))
val s_interrupts = Mux(m_interrupts === 0 && (reg_mstatus.prv < PRV.S || (reg_mstatus.prv === PRV.S && reg_mstatus.sie)), pending_interrupts & reg_mideleg, UInt(0))
val all_interrupts = m_interrupts | s_interrupts
val (anyInterrupt, whichInterrupt) = chooseInterrupt(Seq(s_interrupts, m_interrupts))
val interruptMSB = BigInt(1) << (xLen-1)
val interruptCause = UInt(interruptMSB) + PriorityEncoder(all_interrupts)
io.interrupt := all_interrupts.orR && !reg_debug && !io.singleStep || reg_singleStepped
val interruptCause = UInt(interruptMSB) + whichInterrupt
io.interrupt := anyInterrupt && !reg_debug && !io.singleStep || reg_singleStepped
io.interrupt_cause := interruptCause
io.bp := reg_bp take nBreakpoints
io.pmp := reg_pmp.map(PMP(_))
@ -758,6 +758,14 @@ class CSRFile(perfEventSets: EventSets = new EventSets(Seq()))(implicit p: Param
}
}
def chooseInterrupt(masks: Seq[UInt]) = {
// we can't simply choose the highest-numbered interrupt, because timer
// interrupts are in the wrong place in mip.
val timerMask = UInt(0xF0, xLen)
val masked = masks.map(m => Cat(m.padTo(xLen) & ~timerMask, m.padTo(xLen) & timerMask))
(masks.map(_.orR).reduce(_||_), Log2(masked.asUInt)(log2Ceil(xLen)-1, 0))
}
def readModifyWriteCSR(cmd: UInt, rdata: UInt, wdata: UInt) =
(Mux(cmd.isOneOf(CSR.S, CSR.C), rdata, UInt(0)) | wdata) & ~Mux(cmd === CSR.C, wdata, UInt(0))

View File

@ -11,6 +11,7 @@ import uncore.tilelink2._
import uncore.util._
import util._
import TLMessages._
import scala.math.min
class DCacheDataReq(implicit p: Parameters) extends L1HellaCacheBundle()(p) {
val addr = Bits(width = untagBits)
@ -57,6 +58,19 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) {
data.io.req <> dataArb.io.out
dataArb.io.out.ready := true
val rational = p(coreplex.RocketCrossing) match {
case coreplex.RationalCrossing(_) => true
case _ => false
}
val tl_out_a = Wire(tl_out.a)
val q_depth = if (rational) min(2, maxUncachedInFlight-1) else 0
if (q_depth <= 0) {
tl_out.a <> tl_out_a
} else {
tl_out.a <> Queue(tl_out_a, q_depth, flow = true, pipe = true)
}
val s1_valid = Reg(next=io.cpu.req.fire(), init=Bool(false))
val s1_probe = Reg(next=tl_out.b.fire(), init=Bool(false))
val probe_bits = RegEnable(tl_out.b.bits, tl_out.b.fire()) // TODO has data now :(
@ -176,7 +190,7 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) {
val (s2_prb_ack_data, s2_report_param, probeNewCoh)= s2_probe_state.onProbe(probe_bits.param)
val (s2_victim_dirty, s2_shrink_param, voluntaryNewCoh) = s2_victim_state.onCacheControl(M_FLUSH)
val s2_update_meta = s2_hit_state =/= s2_new_hit_state
io.cpu.s2_nack := s2_valid && !s2_valid_hit && !(s2_valid_uncached && tl_out.a.ready && !uncachedInFlight.asUInt.andR)
io.cpu.s2_nack := s2_valid && !s2_valid_hit && !(s2_valid_uncached && tl_out_a.ready && !uncachedInFlight.asUInt.andR)
when (io.cpu.s2_nack || (s2_valid_hit && s2_update_meta)) { s1_nack := true }
val s3_valid = Reg(next = s2_valid, init=Bool(false))
@ -285,17 +299,17 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) {
M_XA_MAXU -> edge.Arithmetic(a_source, access_address, a_size, a_data, TLAtomics.MAXU)._2))
} else {
// If no managers support atomics, assert fail if processor asks for them
assert (!(tl_out.a.valid && pstore1_amo && s2_write && s2_uncached))
assert (!(tl_out_a.valid && pstore1_amo && s2_write && s2_uncached))
Wire(new TLBundleA(edge.bundle))
}
tl_out.a.valid := (s2_valid_cached_miss && !s2_victim_dirty) ||
tl_out_a.valid := (s2_valid_cached_miss && !s2_victim_dirty) ||
(s2_valid_uncached && !uncachedInFlight.asUInt.andR)
tl_out.a.bits := Mux(!s2_uncached, acquire, Mux(!s2_write, get, Mux(!pstore1_amo, put, atomics)))
tl_out_a.bits := Mux(!s2_uncached, acquire, Mux(!s2_write, get, Mux(!pstore1_amo, put, atomics)))
// Set pending bits for outstanding TileLink transaction
val a_sel = UIntToOH(a_source, maxUncachedInFlight+mmioOffset) >> mmioOffset
when (tl_out.a.fire()) {
when (tl_out_a.fire()) {
when (s2_uncached) {
(a_sel.toBools zip (uncachedInFlight zip uncachedReqs)) foreach { case (s, (f, r)) =>
when (s) {
@ -518,7 +532,7 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) {
val flushed = Reg(init=Bool(true))
val flushing = Reg(init=Bool(false))
val flushCounter = Counter(nSets * nWays)
when (tl_out.a.fire() && !s2_uncached) { flushed := false }
when (tl_out_a.fire() && !s2_uncached) { flushed := false }
when (s2_valid_masked && s2_req.cmd === M_FLUSH_ALL) {
io.cpu.s2_nack := !flushed
when (!flushed) {
@ -542,6 +556,6 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) {
}
// performance events
io.cpu.acquire := edge.done(tl_out.a)
io.cpu.acquire := edge.done(tl_out_a)
io.cpu.release := edge.done(tl_out.c)
}

View File

@ -5,6 +5,7 @@ package rocket
import Chisel._
import Chisel.ImplicitConversions._
import chisel3.core.withReset
import config._
import coreplex._
import diplomacy._
@ -64,11 +65,12 @@ class FrontendModule(outer: Frontend) extends LazyModuleImp(outer)
val icache = outer.icache.module
val tlb = Module(new TLB(log2Ceil(coreInstBytes*fetchWidth), nTLBEntries))
val fq = withReset(reset || io.cpu.req.valid) { Module(new ShiftQueue(new FrontendResp, 3, flow = true)) }
val s0_valid = io.cpu.req.valid || fq.io.enq.ready
val s1_pc_ = Reg(UInt(width=vaddrBitsExtended))
val s1_pc = ~(~s1_pc_ | (coreInstBytes-1)) // discard PC LSBS (this propagates down the pipeline)
val s1_speculative = Reg(Bool())
val s1_same_block = Reg(Bool())
val s2_valid = Reg(init=Bool(true))
val s2_pc = Reg(init=io.resetVector)
val s2_btb_resp_valid = Reg(init=Bool(false))
@ -82,39 +84,33 @@ class FrontendModule(outer: Frontend) extends LazyModuleImp(outer)
val s2_speculative = Reg(init=Bool(false))
val s2_cacheable = Reg(init=Bool(false))
val ntpc = ~(~s1_pc | (coreInstBytes*fetchWidth-1)) + UInt(coreInstBytes*fetchWidth)
val ntpc_same_block = (ntpc & rowBytes) === (s1_pc & rowBytes)
val fetchBytes = coreInstBytes * fetchWidth
val s1_base_pc = ~(~s1_pc | (fetchBytes - 1))
val ntpc = s1_base_pc + fetchBytes.U
val predicted_npc = Wire(init = ntpc)
val predicted_taken = Wire(init = Bool(false))
val icmiss = s2_valid && !icache.io.resp.valid
val npc = Mux(icmiss, s2_pc, predicted_npc)
val s0_same_block = !predicted_taken && !icmiss && !io.cpu.req.valid && ntpc_same_block
val stall = io.cpu.resp.valid && !io.cpu.resp.ready
when (!stall) {
s1_same_block := s0_same_block && !tlb.io.resp.miss
s1_pc_ := io.cpu.npc
// consider RVC fetches across blocks to be non-speculative if the first
// part was non-speculative
val s0_speculative =
if (usingCompressed) s1_speculative || s2_valid && !s2_speculative || predicted_taken
else Bool(true)
s1_speculative := Mux(icmiss, s2_speculative, s0_speculative)
s2_valid := !icmiss
when (!icmiss) {
s2_pc := s1_pc
s2_speculative := s1_speculative
s2_cacheable := tlb.io.resp.cacheable
s2_maybe_pf := tlb.io.resp.pf.inst
s2_maybe_ae := tlb.io.resp.ae.inst
s2_tlb_miss := tlb.io.resp.miss
}
}
when (io.cpu.req.valid) {
s1_same_block := Bool(false)
s1_pc_ := io.cpu.npc
s1_speculative := io.cpu.req.bits.speculative
s2_valid := Bool(false)
val s2_replay = Wire(Bool())
s2_replay := (s2_valid && !fq.io.enq.fire()) || RegNext(s2_replay && !s0_valid)
val npc = Mux(s2_replay, s2_pc, predicted_npc)
s1_pc_ := io.cpu.npc
// consider RVC fetches across blocks to be non-speculative if the first
// part was non-speculative
val s0_speculative =
if (usingCompressed) s1_speculative || s2_valid && !s2_speculative || predicted_taken
else Bool(true)
s1_speculative := Mux(io.cpu.req.valid, io.cpu.req.bits.speculative, Mux(s2_replay, s2_speculative, s0_speculative))
s2_valid := false
when (!s2_replay && !io.cpu.req.valid) {
s2_valid := true
s2_pc := s1_pc
s2_speculative := s1_speculative
s2_cacheable := tlb.io.resp.cacheable
s2_maybe_pf := tlb.io.resp.pf.inst
s2_maybe_ae := tlb.io.resp.ae.inst
s2_tlb_miss := tlb.io.resp.miss
}
if (usingBTB) {
@ -124,7 +120,7 @@ class FrontendModule(outer: Frontend) extends LazyModuleImp(outer)
btb.io.btb_update := io.cpu.btb_update
btb.io.bht_update := io.cpu.bht_update
btb.io.ras_update := io.cpu.ras_update
when (!stall && !icmiss) {
when (!s2_replay) {
btb.io.req.valid := true
s2_btb_resp_valid := btb.io.resp.valid
s2_btb_resp_bits := btb.io.resp.bits
@ -133,10 +129,18 @@ class FrontendModule(outer: Frontend) extends LazyModuleImp(outer)
predicted_npc := btb.io.resp.bits.target.sextTo(vaddrBitsExtended)
predicted_taken := Bool(true)
}
// push RAS speculatively
btb.io.ras_update.valid := btb.io.req.valid && btb.io.resp.valid && btb.io.resp.bits.cfiType.isOneOf(CFIType.call, CFIType.ret)
val returnAddrLSBs = btb.io.resp.bits.bridx +& 1
btb.io.ras_update.bits.returnAddr :=
Mux(returnAddrLSBs(log2Ceil(fetchWidth)), ntpc, s1_base_pc | ((returnAddrLSBs << log2Ceil(coreInstBytes)) & (fetchBytes - 1)))
btb.io.ras_update.bits.cfiType := btb.io.resp.bits.cfiType
btb.io.ras_update.bits.prediction.valid := true
}
io.ptw <> tlb.io.ptw
tlb.io.req.valid := !stall && !icmiss
tlb.io.req.valid := !s2_replay
tlb.io.req.bits.vaddr := s1_pc
tlb.io.req.bits.passthrough := Bool(false)
tlb.io.req.bits.instruction := Bool(true)
@ -144,27 +148,26 @@ class FrontendModule(outer: Frontend) extends LazyModuleImp(outer)
tlb.io.req.bits.sfence := io.cpu.sfence
tlb.io.req.bits.size := log2Ceil(coreInstBytes*fetchWidth)
icache.io.req.valid := !stall && !s0_same_block
icache.io.req.valid := s0_valid
icache.io.req.bits.addr := io.cpu.npc
icache.io.invalidate := io.cpu.flush_icache
icache.io.s1_paddr := tlb.io.resp.paddr
icache.io.s1_kill := io.cpu.req.valid || tlb.io.resp.miss || icmiss || s1_speculative && !tlb.io.resp.cacheable || tlb.io.resp.pf.inst || tlb.io.resp.ae.inst
icache.io.s2_kill := false
icache.io.resp.ready := !stall && !s1_same_block
icache.io.s1_kill := io.cpu.req.valid || tlb.io.resp.miss || s2_replay
icache.io.s2_kill := s2_speculative && !s2_cacheable || s2_xcpt
val s2_kill = s2_speculative && !s2_cacheable || s2_xcpt
io.cpu.resp.valid := s2_valid && (icache.io.resp.valid || s2_kill)
io.cpu.resp.bits.pc := s2_pc
fq.io.enq.valid := s2_valid && (icache.io.resp.valid || icache.io.s2_kill)
fq.io.enq.bits.pc := s2_pc
io.cpu.npc := Mux(io.cpu.req.valid, io.cpu.req.bits.pc, npc)
require(fetchWidth * coreInstBytes <= rowBytes && isPow2(fetchWidth))
io.cpu.resp.bits.data := icache.io.resp.bits.datablock >> (s2_pc.extract(log2Ceil(rowBytes)-1,log2Ceil(fetchWidth*coreInstBytes)) << log2Ceil(fetchWidth*coreInstBits))
io.cpu.resp.bits.mask := UInt((1 << fetchWidth)-1) << s2_pc.extract(log2Ceil(fetchWidth)+log2Ceil(coreInstBytes)-1, log2Ceil(coreInstBytes))
io.cpu.resp.bits.pf := s2_pf
io.cpu.resp.bits.ae := s2_ae
io.cpu.resp.bits.replay := s2_kill && !icache.io.resp.valid && !s2_xcpt
io.cpu.resp.bits.btb.valid := s2_btb_resp_valid
io.cpu.resp.bits.btb.bits := s2_btb_resp_bits
fq.io.enq.bits.data := icache.io.resp.bits
fq.io.enq.bits.mask := UInt((1 << fetchWidth)-1) << s2_pc.extract(log2Ceil(fetchWidth)+log2Ceil(coreInstBytes)-1, log2Ceil(coreInstBytes))
fq.io.enq.bits.pf := s2_pf
fq.io.enq.bits.ae := s2_ae
fq.io.enq.bits.replay := icache.io.s2_kill && !icache.io.resp.valid && !s2_xcpt
fq.io.enq.bits.btb.valid := s2_btb_resp_valid
fq.io.enq.bits.btb.bits := s2_btb_resp_bits
io.cpu.resp <> fq.io.deq
// performance events
io.cpu.acquire := edge.done(icache.io.mem(0).a)

View File

@ -84,7 +84,7 @@ class IBuf(implicit p: Parameters) extends CoreModule {
val ae = valid & (Mux(buf.ae, bufMask, UInt(0)) | Mux(io.imem.bits.ae, ~bufMask, UInt(0)))
val ic_replay = valid & (Mux(buf.replay, bufMask, UInt(0)) | Mux(io.imem.bits.replay, ~bufMask, UInt(0)))
val ibufBTBHitMask = Mux(ibufBTBHit, UIntToOH(ibufBTBResp.bridx), UInt(0))
assert(!io.imem.bits.btb.valid || io.imem.bits.btb.bits.bridx >= pcWordBits)
assert(!io.imem.valid || !io.imem.bits.btb.valid || io.imem.bits.btb.bits.bridx >= pcWordBits)
val icBTBHitMask = Mux(io.imem.bits.btb.valid, UIntToOH(io.imem.bits.btb.bits.bridx +& nBufValid - pcWordBits), UInt(0))
val btbHitMask = ibufBTBHitMask & bufMask | icBTBHitMask & ~bufMask

View File

@ -47,7 +47,7 @@ class ICacheBundle(outer: ICache) extends CoreBundle()(outer.p) {
val s1_kill = Bool(INPUT) // delayed one cycle w.r.t. req
val s2_kill = Bool(INPUT) // delayed two cycles; prevents I$ miss emission
val resp = Decoupled(new ICacheResp)
val resp = Valid(UInt(width = coreInstBits * fetchWidth))
val invalidate = Bool(INPUT)
val mem = outer.node.bundleOut
}
@ -65,7 +65,6 @@ class ICacheModule(outer: ICache) extends LazyModuleImp(outer)
val s_ready :: s_request :: s_refill :: Nil = Enum(UInt(), 3)
val state = Reg(init=s_ready)
val invalidated = Reg(Bool())
val stall = !io.resp.ready
val refill_addr = Reg(UInt(width = paddrBits))
val s1_tag_hit = Wire(Vec(nWays, Bool()))
@ -78,10 +77,10 @@ class ICacheModule(outer: ICache) extends LazyModuleImp(outer)
val s1_hit = out_valid && s1_any_tag_hit
val s1_miss = s1_valid && state === s_ready && !s1_any_tag_hit
val s0_valid = io.req.valid && state === s_ready && !(s1_valid && stall)
val s0_valid = io.req.valid && state === s_ready
val s0_vaddr = io.req.bits.addr
s1_valid := s0_valid || out_valid && stall
s1_valid := s0_valid
when (s1_miss) { refill_addr := io.s1_paddr }
val refill_tag = refill_addr(tagBits+untagBits-1,untagBits)
@ -110,16 +109,34 @@ class ICacheModule(outer: ICache) extends LazyModuleImp(outer)
}
val s1_tag_disparity = Wire(Vec(nWays, Bool()))
val s1_dout = Wire(Vec(nWays, UInt(width = code.width(rowBits))))
val wordBits = coreInstBits * fetchWidth
val s1_dout = Wire(Vec(nWays, UInt(width = code.width(wordBits))))
val s1_dout_valid = RegNext(s0_valid)
for (i <- 0 until nWays) {
val s1_vb = vb_array(Cat(UInt(i), io.s1_paddr(untagBits-1,blockOffBits))).toBool
s1_tag_disparity(i) := (code.decode(tag_rdata(i)).error holdUnless s1_dout_valid)
s1_tag_hit(i) := s1_vb && ((code.decode(tag_rdata(i)).uncorrected === s1_tag) holdUnless s1_dout_valid)
s1_tag_disparity(i) := code.decode(tag_rdata(i)).error
s1_tag_hit(i) := s1_vb && code.decode(tag_rdata(i)).uncorrected === s1_tag
}
val data_arrays = Seq.fill(nWays) { SeqMem(nSets * refillCycles, Bits(width = code.width(rowBits))) }
require(rowBits % wordBits == 0)
val data_arrays = Seq.fill(rowBits / wordBits) { SeqMem(nSets * refillCycles, Vec(nWays, UInt(width = code.width(wordBits)))) }
for ((data_array, i) <- data_arrays zipWithIndex) {
val wen = tl_out.d.valid
when (wen) {
val idx = (refill_idx << log2Ceil(refillCycles)) | refill_cnt
val data = tl_out.d.bits.data(wordBits*(i+1)-1, wordBits*i)
data_array.write(idx, Vec.fill(nWays)(code.encode(data)), (0 until nWays).map(repl_way === _))
}
def wordMatch(addr: UInt) = addr.extract(log2Ceil(rowBytes)-1, log2Ceil(wordBits/8)) === i
val s0_raddr = s0_vaddr(untagBits-1,blockOffBits-log2Ceil(refillCycles))
val dout = data_array.read(s0_raddr, !wen && (s0_valid && wordMatch(s0_vaddr)))
when (wordMatch(io.s1_paddr)) {
s1_dout := dout
}
}
/*
for ((data_array, i) <- data_arrays zipWithIndex) {
val wen = tl_out.d.valid && repl_way === UInt(i)
when (wen) {
@ -127,28 +144,29 @@ class ICacheModule(outer: ICache) extends LazyModuleImp(outer)
data_array.write((refill_idx << log2Ceil(refillCycles)) | refill_cnt, e_d)
}
val s0_raddr = s0_vaddr(untagBits-1,blockOffBits-log2Ceil(refillCycles))
s1_dout(i) := data_array.read(s0_raddr, !wen && s0_valid) holdUnless s1_dout_valid
s1_dout(i) := data_array.read(s0_raddr, !wen && s0_valid)
}
*/
// output signals
outer.latency match {
case 1 =>
require(code.width(rowBits) == rowBits) // no ECC
io.resp.bits.datablock := Mux1H(s1_tag_hit, s1_dout)
io.resp.bits := Mux1H(s1_tag_hit, s1_dout)
io.resp.valid := s1_hit
case 2 =>
val s2_valid = RegEnable(out_valid, Bool(false), !stall)
val s2_hit = RegEnable(s1_hit, Bool(false), !stall)
val s2_tag_hit = RegEnable(s1_tag_hit, !stall)
val s2_dout = RegEnable(s1_dout, !stall)
val s2_valid = RegNext(out_valid, Bool(false))
val s2_hit = RegNext(s1_hit, Bool(false))
val s2_tag_hit = RegEnable(s1_tag_hit, s1_valid)
val s2_dout = RegEnable(s1_dout, s1_valid)
val s2_way_mux = Mux1H(s2_tag_hit, s2_dout)
val s2_tag_disparity = RegEnable(s1_tag_disparity, !stall).asUInt.orR
val s2_tag_disparity = RegEnable(s1_tag_disparity, s1_valid).asUInt.orR
val s2_data_disparity = code.decode(s2_way_mux).error
val s2_disparity = s2_tag_disparity || s2_data_disparity
when (s2_valid && s2_disparity) { invalidate := true }
io.resp.bits.datablock := code.decode(s2_way_mux).uncorrected
io.resp.bits := code.decode(s2_way_mux).uncorrected
io.resp.valid := s2_hit && !s2_disparity
}
tl_out.a.valid := state === s_request && !io.s2_kill

View File

@ -173,7 +173,7 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p)
val ibuf = Module(new IBuf)
val id_expanded_inst = ibuf.io.inst.map(_.bits.inst)
val id_inst = id_expanded_inst.map(_.bits)
ibuf.io.imem <> (if (usingCompressed) withReset(reset || take_pc) { Queue(io.imem.resp, 1, flow = true) } else io.imem.resp)
ibuf.io.imem <> io.imem.resp
ibuf.io.kill := take_pc
require(decodeWidth == 1 /* TODO */ && retireWidth == decodeWidth)
@ -520,7 +520,7 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p)
val sboard = new Scoreboard(32, true)
sboard.clear(ll_wen, ll_waddr)
val id_sboard_hazard = checkHazards(hazard_targets, sboard.read _)
val id_sboard_hazard = checkHazards(hazard_targets, rd => sboard.read(rd) && !(ll_wen && ll_waddr === rd))
sboard.set(wb_set_sboard && wb_wen, wb_waddr)
// stall for RAW/WAW hazards on CSRs, loads, AMOs, and mul/div in execute stage.
@ -587,8 +587,11 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p)
io.imem.btb_update.valid := (mem_reg_replay && mem_reg_btb_hit) || (mem_reg_valid && !take_pc_wb && (((mem_cfi_taken || !mem_cfi) && mem_wrong_npc) || (Bool(fastJAL) && mem_ctrl.jal && !mem_reg_btb_hit)))
io.imem.btb_update.bits.isValid := !mem_reg_replay && mem_cfi
io.imem.btb_update.bits.isJump := mem_ctrl.jal || mem_ctrl.jalr
io.imem.btb_update.bits.isReturn := mem_ctrl.jalr && mem_reg_inst(19,15) === BitPat("b00?01")
io.imem.btb_update.bits.cfiType :=
Mux((mem_ctrl.jal || mem_ctrl.jalr) && mem_waddr(0), CFIType.call,
Mux(mem_ctrl.jalr && mem_reg_inst(19,15) === BitPat("b00?01"), CFIType.ret,
Mux(mem_ctrl.jal || mem_ctrl.jalr, CFIType.jump,
CFIType.branch)))
io.imem.btb_update.bits.target := io.imem.req.bits.pc
io.imem.btb_update.bits.br_pc := (if (usingCompressed) mem_reg_pc + Mux(mem_reg_rvc, UInt(0), UInt(2)) else mem_reg_pc)
io.imem.btb_update.bits.pc := ~(~io.imem.btb_update.bits.br_pc | (coreInstBytes*fetchWidth-1))
@ -601,12 +604,6 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p)
io.imem.bht_update.bits.mispredict := mem_wrong_npc
io.imem.bht_update.bits.prediction := io.imem.btb_update.bits.prediction
io.imem.ras_update.valid := mem_reg_valid && !take_pc_wb
io.imem.ras_update.bits.returnAddr := mem_int_wdata
io.imem.ras_update.bits.isCall := io.imem.btb_update.bits.isJump && mem_waddr(0)
io.imem.ras_update.bits.isReturn := io.imem.btb_update.bits.isReturn
io.imem.ras_update.bits.prediction := io.imem.btb_update.bits.prediction
io.fpu.valid := !ctrl_killd && id_ctrl.fp
io.fpu.killx := ctrl_killx
io.fpu.killm := killm_common

View File

@ -4,7 +4,7 @@
package rocketchip
import Chisel._
import scala.collection.mutable.{LinkedHashSet, ArrayBuffer}
import scala.collection.mutable.LinkedHashSet
abstract class RocketTestSuite {
val dir: String
@ -56,9 +56,9 @@ class RegressionTestSuite(val names: LinkedHashSet[String]) extends RocketTestSu
}
object TestGeneration {
private val suites = ArrayBuffer[RocketTestSuite]()
private val suites = collection.mutable.ListMap[String, RocketTestSuite]()
def addSuite(s: RocketTestSuite) { suites += s }
def addSuite(s: RocketTestSuite) { suites += (s.makeTargetName -> s) }
def addSuites(s: Seq[RocketTestSuite]) { s.foreach(addSuite) }
@ -93,7 +93,7 @@ run-$kind-tests-fast: $$(addprefix $$(output_dir)/, $$(addsuffix .run, $targets)
} else { "\n" }
}
suites.groupBy(_.kind).map { case (kind, s) => gen(kind, s) }.mkString("\n")
suites.values.toSeq.groupBy(_.kind).map { case (kind, s) => gen(kind, s) }.mkString("\n")
}
}

View File

@ -450,6 +450,7 @@ class IntToFP(val latency: Int)(implicit p: Parameters) extends FPUModule()(p) {
l2s.io.signedIn := ~in.bits.typ(0)
l2s.io.in := intValue
l2s.io.roundingMode := in.bits.rm
l2s.io.detectTininess := hardfloat.consts.tininess_afterRounding
mux.data := sanitizeNaN(l2s.io.out, FType.S)
mux.exc := l2s.io.exceptionFlags
@ -460,6 +461,7 @@ class IntToFP(val latency: Int)(implicit p: Parameters) extends FPUModule()(p) {
l2d.io.signedIn := ~in.bits.typ(0)
l2d.io.in := intValue
l2d.io.roundingMode := in.bits.rm
l2d.io.detectTininess := hardfloat.consts.tininess_afterRounding
mux.data := Cat(l2d.io.out >> l2s.io.out.getWidth, l2s.io.out)
when (!in.bits.singleIn) {
mux.data := sanitizeNaN(l2d.io.out, FType.D)
@ -511,11 +513,13 @@ class FPToFP(val latency: Int)(implicit p: Parameters) extends FPUModule()(p) {
val d2s = Module(new hardfloat.RecFNToRecFN(dExpWidth, dSigWidth, sExpWidth, sSigWidth))
d2s.io.in := in.bits.in1
d2s.io.roundingMode := in.bits.rm
d2s.io.detectTininess := hardfloat.consts.tininess_afterRounding
val d2sOut = sanitizeNaN(d2s.io.out, FType.S)
val s2d = Module(new hardfloat.RecFNToRecFN(sExpWidth, sSigWidth, dExpWidth, dSigWidth))
s2d.io.in := maxType.unsafeConvert(in.bits.in1, FType.S)
s2d.io.roundingMode := in.bits.rm
s2d.io.detectTininess := hardfloat.consts.tininess_afterRounding
val s2dOut = sanitizeNaN(s2d.io.out, FType.D)
when (in.bits.singleOut) {
@ -554,6 +558,7 @@ class FPUFMAPipe(val latency: Int, t: FType)(implicit p: Parameters) extends FPU
val fma = Module(new hardfloat.MulAddRecFN(t.exp, t.sig))
fma.io.op := in.fmaCmd
fma.io.roundingMode := in.rm
fma.io.detectTininess := hardfloat.consts.tininess_afterRounding
fma.io.a := in.in1
fma.io.b := in.in2
fma.io.c := in.in3
@ -775,6 +780,7 @@ class FPU(cfg: FPUParams)(implicit p: Parameters) extends FPUModule()(p) {
divSqrt.io.a := fpiu.io.out.bits.in.in1
divSqrt.io.b := fpiu.io.out.bits.in.in2
divSqrt.io.roundingMode := fpiu.io.out.bits.in.rm
divSqrt.io.detectTininess := hardfloat.consts.tininess_afterRounding
when (divSqrt.io.inValid && divSqrt_inReady) {
divSqrt_in_flight := true
@ -794,6 +800,7 @@ class FPU(cfg: FPUParams)(implicit p: Parameters) extends FPUModule()(p) {
val divSqrt_toSingle = Module(new hardfloat.RecFNToRecFN(11, 53, 8, 24))
divSqrt_toSingle.io.in := divSqrt_wdata_double
divSqrt_toSingle.io.roundingMode := divSqrt_rm
divSqrt_toSingle.io.detectTininess := hardfloat.consts.tininess_afterRounding
divSqrt_wdata := Mux(divSqrt_single, Cat(divSqrt_wdata_double >> divSqrt_toSingle.io.out.getWidth, sanitizeNaN(divSqrt_toSingle.io.out, FType.S)), divSqrt_wdata_double)
divSqrt_flags := divSqrt_flags_double | Mux(divSqrt_single, divSqrt_toSingle.io.exceptionFlags, Bits(0))
} else {

View File

@ -30,7 +30,8 @@ trait HasExternalInterrupts extends HasTileParameters {
// debug, msip, mtip, meip, seip, lip offsets in CSRs
def csrIntMap: List[Int] = {
val nlips = tileParams.core.nLocalInterrupts
List(65535, 3, 7, 11, 9) ++ List.tabulate(nlips)(_ + 16)
val seip = if (usingVM) Seq(9) else Nil
List(65535, 3, 7, 11) ++ seip ++ List.tabulate(nlips)(_ + 16)
}
}

View File

@ -132,7 +132,13 @@ class TLWidthWidget(innerBeatBytes: Int)(implicit p: Parameters) extends LazyMod
} else if (edgeIn.manager.beatBytes > edgeOut.manager.beatBytes) {
// split input to output
val repeat = Wire(Bool())
repeat := split(edgeIn, Repeater(in, repeat), edgeOut, out)
val repeated = Repeater(in, repeat)
val cated = Wire(repeated)
cated <> repeated
edgeIn.data(cated.bits) := Cat(
edgeIn.data(repeated.bits)(edgeIn.manager.beatBytes*8-1, edgeOut.manager.beatBytes*8),
edgeIn.data(in.bits)(edgeOut.manager.beatBytes*8-1, 0))
repeat := split(edgeIn, cated, edgeOut, out)
} else {
// merge input to output
merge(edgeIn, in, edgeOut, out)

View File

@ -38,9 +38,17 @@ package object util {
implicit def wcToUInt(c: WideCounter): UInt = c.value
implicit class UIntToAugmentedUInt(val x: UInt) extends AnyVal {
def sextTo(n: Int): UInt =
def sextTo(n: Int): UInt = {
require(x.getWidth <= n)
if (x.getWidth == n) x
else Cat(Fill(n - x.getWidth, x(x.getWidth-1)), x)
}
def padTo(n: Int): UInt = {
require(x.getWidth <= n)
if (x.getWidth == n) x
else Cat(UInt(0, n - x.getWidth), x)
}
def extract(hi: Int, lo: Int): UInt = {
if (hi == lo-1) UInt(0)

View File

@ -0,0 +1,61 @@
// See LICENSE.SiFive for license details.
package util
import Chisel._
/** Implements the same interface as chisel3.util.Queue, but uses a shift
* register internally. It is less energy efficient whenever the queue
* has more than one entry populated, but is faster on the dequeue side.
* It is efficient for usually-empty flow-through queues. */
class ShiftQueue[T <: Data](gen: T,
val entries: Int,
pipe: Boolean = false,
flow: Boolean = false)
extends Module {
val io = IO(new QueueIO(gen, entries) {
val mask = UInt(OUTPUT, entries)
})
private val ram = Mem(entries, gen)
private val valid = RegInit(UInt(0, entries))
private val elts = Reg(Vec(entries, gen))
private val do_enq = Wire(init=io.enq.fire())
private val do_deq = Wire(init=io.deq.fire())
when (do_deq) {
when (!do_enq) { valid := (valid >> 1) }
for (i <- 1 until entries)
when (valid(i)) { elts(i-1) := elts(i) }
}
when (do_enq && do_deq) {
for (i <- 0 until entries)
when (valid(i) && (if (i == entries-1) true.B else !valid(i+1))) { elts(i) := io.enq.bits }
}
when (do_enq && !do_deq) {
valid := (valid << 1) | UInt(1)
for (i <- 0 until entries)
when (!valid(i) && (if (i == 0) true.B else valid(i-1))) { elts(i) := io.enq.bits }
}
io.enq.ready := !valid(entries-1)
io.deq.valid := valid(0)
io.deq.bits := elts.head
if (flow) {
when (io.enq.valid) { io.deq.valid := true.B }
when (!valid(0)) {
io.deq.bits := io.enq.bits
do_deq := false.B
when (io.deq.ready) { do_enq := false.B }
}
}
if (pipe) {
when (io.deq.ready) { io.enq.ready := true.B }
}
io.count := PopCount(valid)
io.mask := valid
}