Perform some control-flow transfers within the Frontend
This commit is contained in:
@ -81,21 +81,24 @@ class FrontendModule(outer: Frontend) extends LazyModuleImp(outer)
|
||||
val icache = outer.icache.module
|
||||
require(fetchWidth*coreInstBytes == outer.icacheParams.fetchBytes)
|
||||
|
||||
val tlb = Module(new TLB(log2Ceil(coreInstBytes*fetchWidth), nTLBEntries))
|
||||
val fq = withReset(reset || io.cpu.req.valid) { Module(new ShiftQueue(new FrontendResp, 4, flow = true)) }
|
||||
val fetchBytes = coreInstBytes * fetchWidth
|
||||
val tlb = Module(new TLB(log2Ceil(fetchBytes), nTLBEntries))
|
||||
val fq = withReset(reset || io.cpu.req.valid) { Module(new ShiftQueue(new FrontendResp, 5, flow = true)) }
|
||||
|
||||
val s0_valid = io.cpu.req.valid || !fq.io.mask(fq.io.mask.getWidth-2)
|
||||
val s0_valid = io.cpu.req.valid || !fq.io.mask(fq.io.mask.getWidth-3)
|
||||
val s1_pc = Reg(UInt(width=vaddrBitsExtended))
|
||||
val s1_speculative = Reg(Bool())
|
||||
val s2_valid = RegInit(false.B)
|
||||
val s2_pc = Reg(init=io.resetVector)
|
||||
val s2_btb_resp_valid = Reg(init=Bool(false))
|
||||
val s2_pc = RegInit(alignPC(io.resetVector))
|
||||
val s2_btb_resp_valid = if (usingBTB) Reg(Bool()) else false.B
|
||||
val s2_btb_resp_bits = Reg(new BTBResp)
|
||||
val s2_tlb_resp = Reg(tlb.io.resp)
|
||||
val s2_xcpt = !s2_tlb_resp.miss && fq.io.enq.bits.xcpt.asUInt.orR
|
||||
val s2_speculative = Reg(init=Bool(false))
|
||||
val s2_partial_insn_valid = RegInit(false.B)
|
||||
val s2_partial_insn = Reg(UInt(width = coreInstBits))
|
||||
val s2_wrong_path = Reg(Bool())
|
||||
|
||||
val fetchBytes = coreInstBytes * fetchWidth
|
||||
val s1_base_pc = ~(~s1_pc | (fetchBytes - 1))
|
||||
val ntpc = s1_base_pc + fetchBytes.U
|
||||
val predicted_npc = Wire(init = ntpc)
|
||||
@ -113,40 +116,15 @@ class FrontendModule(outer: Frontend) extends LazyModuleImp(outer)
|
||||
else Bool(true)
|
||||
s1_speculative := Mux(io.cpu.req.valid, io.cpu.req.bits.speculative, Mux(s2_replay, s2_speculative, s0_speculative))
|
||||
|
||||
val s2_redirect = Wire(init = io.cpu.req.valid)
|
||||
s2_valid := false
|
||||
when (!s2_replay && !io.cpu.req.valid) {
|
||||
when (!s2_replay && !s2_redirect) {
|
||||
s2_valid := true
|
||||
s2_pc := s1_pc
|
||||
s2_speculative := s1_speculative
|
||||
s2_tlb_resp := tlb.io.resp
|
||||
}
|
||||
|
||||
if (usingBTB) {
|
||||
val btb = Module(new BTB)
|
||||
btb.io.req.valid := false
|
||||
btb.io.req.bits.addr := s1_pc
|
||||
btb.io.btb_update := io.cpu.btb_update
|
||||
btb.io.bht_update := io.cpu.bht_update
|
||||
btb.io.ras_update := io.cpu.ras_update
|
||||
when (!s2_replay) {
|
||||
btb.io.req.valid := true
|
||||
s2_btb_resp_valid := btb.io.resp.valid
|
||||
s2_btb_resp_bits := btb.io.resp.bits
|
||||
}
|
||||
when (btb.io.resp.valid && btb.io.resp.bits.taken) {
|
||||
predicted_npc := btb.io.resp.bits.target.sextTo(vaddrBitsExtended)
|
||||
predicted_taken := Bool(true)
|
||||
}
|
||||
|
||||
// push RAS speculatively
|
||||
btb.io.ras_update.valid := btb.io.req.valid && btb.io.resp.valid && btb.io.resp.bits.cfiType.isOneOf(CFIType.call, CFIType.ret)
|
||||
val returnAddrLSBs = btb.io.resp.bits.bridx +& 1
|
||||
btb.io.ras_update.bits.returnAddr :=
|
||||
Mux(returnAddrLSBs(log2Ceil(fetchWidth)), ntpc, s1_base_pc | ((returnAddrLSBs << log2Ceil(coreInstBytes)) & (fetchBytes - 1)))
|
||||
btb.io.ras_update.bits.cfiType := btb.io.resp.bits.cfiType
|
||||
btb.io.ras_update.bits.prediction.valid := true
|
||||
}
|
||||
|
||||
io.ptw <> tlb.io.ptw
|
||||
tlb.io.req.valid := !s2_replay
|
||||
tlb.io.req.bits.vaddr := s1_pc
|
||||
@ -161,12 +139,12 @@ class FrontendModule(outer: Frontend) extends LazyModuleImp(outer)
|
||||
icache.io.invalidate := io.cpu.flush_icache
|
||||
icache.io.s1_paddr := tlb.io.resp.paddr
|
||||
icache.io.s2_vaddr := s2_pc
|
||||
icache.io.s1_kill := io.cpu.req.valid || tlb.io.resp.miss || s2_replay
|
||||
icache.io.s1_kill := s2_redirect || tlb.io.resp.miss || s2_replay
|
||||
icache.io.s2_kill := s2_valid && (s2_speculative && !s2_tlb_resp.cacheable || s2_xcpt)
|
||||
|
||||
fq.io.enq.valid := s2_valid && (icache.io.resp.valid || icache.io.s2_kill)
|
||||
fq.io.enq.bits.pc := s2_pc
|
||||
io.cpu.npc := ~(~Mux(io.cpu.req.valid, io.cpu.req.bits.pc, npc) | (coreInstBytes-1)) // discard LSB(s)
|
||||
io.cpu.npc := alignPC(Mux(io.cpu.req.valid, io.cpu.req.bits.pc, npc))
|
||||
|
||||
fq.io.enq.bits.data := icache.io.resp.bits.data
|
||||
fq.io.enq.bits.mask := UInt((1 << fetchWidth)-1) << s2_pc.extract(log2Ceil(fetchWidth)+log2Ceil(coreInstBytes)-1, log2Ceil(coreInstBytes))
|
||||
@ -176,11 +154,126 @@ class FrontendModule(outer: Frontend) extends LazyModuleImp(outer)
|
||||
fq.io.enq.bits.xcpt := s2_tlb_resp
|
||||
when (icache.io.resp.valid && icache.io.resp.bits.ae) { fq.io.enq.bits.xcpt.ae.inst := true }
|
||||
|
||||
if (usingBTB) {
|
||||
val btb = Module(new BTB)
|
||||
btb.io.req.valid := false
|
||||
btb.io.req.bits.addr := s1_pc
|
||||
btb.io.btb_update := io.cpu.btb_update
|
||||
btb.io.bht_update := io.cpu.bht_update
|
||||
btb.io.ras_update.valid := false
|
||||
btb.io.bht_advance.valid := false
|
||||
when (!s2_replay) {
|
||||
btb.io.req.valid := !s2_redirect
|
||||
s2_btb_resp_valid := btb.io.resp.valid
|
||||
s2_btb_resp_bits := btb.io.resp.bits
|
||||
}
|
||||
when (btb.io.resp.valid && btb.io.resp.bits.taken) {
|
||||
predicted_npc := btb.io.resp.bits.target.sextTo(vaddrBitsExtended)
|
||||
predicted_taken := Bool(true)
|
||||
}
|
||||
|
||||
if (!coreParams.jumpInFrontend) {
|
||||
// push RAS speculatively
|
||||
btb.io.ras_update.valid := btb.io.req.valid && btb.io.resp.valid && btb.io.resp.bits.cfiType.isOneOf(CFIType.call, CFIType.ret)
|
||||
val returnAddrLSBs = btb.io.resp.bits.bridx +& 1
|
||||
btb.io.ras_update.bits.returnAddr :=
|
||||
Mux(returnAddrLSBs(log2Ceil(fetchWidth)), ntpc, s1_base_pc | ((returnAddrLSBs << log2Ceil(coreInstBytes)) & (fetchBytes - 1)))
|
||||
btb.io.ras_update.bits.cfiType := btb.io.resp.bits.cfiType
|
||||
btb.io.ras_update.bits.prediction.valid := true
|
||||
} else when (fq.io.enq.fire()) {
|
||||
val s2_btb_hit = s2_btb_resp_valid && s2_btb_resp_bits.taken
|
||||
val s2_base_pc = ~(~s2_pc | (fetchBytes-1))
|
||||
val taken_idx = Wire(UInt())
|
||||
val after_idx = Wire(UInt())
|
||||
|
||||
def scanInsns(idx: Int, prevValid: Bool, prevBits: UInt, prevTaken: Bool): Bool = {
|
||||
val prevRVI = prevValid && prevBits(1,0) === 3
|
||||
val valid = fq.io.enq.bits.mask(idx) && !prevRVI
|
||||
val bits = fq.io.enq.bits.data(coreInstBits*(idx+1)-1, coreInstBits*idx)
|
||||
val rvc = bits(1,0) =/= 3
|
||||
val rviBits = Cat(bits, prevBits)
|
||||
val rviBranch = rviBits(6,0) === Instructions.BEQ.value.asUInt()(6,0)
|
||||
val rviJump = rviBits(6,0) === Instructions.JAL.value.asUInt()(6,0)
|
||||
val rviJALR = rviBits(6,0) === Instructions.JALR.value.asUInt()(6,0)
|
||||
val rviReturn = rviJALR && !rviBits(7) && BitPat("b00?01") === rviBits(19,15)
|
||||
val rviCall = (rviJALR || rviJump) && rviBits(7)
|
||||
val rvcBranch = bits === Instructions.C_BEQZ || bits === Instructions.C_BNEZ
|
||||
val rvcJAL = Bool(xLen == 32) && bits === Instructions.C_JAL
|
||||
val rvcJump = bits === Instructions.C_J || rvcJAL
|
||||
val rvcImm = Mux(bits(14), new RVCDecoder(bits).bImm.asSInt, 0.S) | Mux(bits(14,13) === 1, new RVCDecoder(bits).jImm.asSInt, 0.S)
|
||||
val rvcJR = bits === Instructions.C_MV && bits(6,2) === 0
|
||||
val rvcReturn = rvcJR && BitPat("b00?01") === bits(11,7)
|
||||
val rvcJALR = bits === Instructions.C_ADD && bits(6,2) === 0
|
||||
val rvcCall = rvcJAL || rvcJALR
|
||||
val rviImm = Mux(rviBits(3), ImmGen(IMM_UJ, rviBits), 0.S) | Mux(!rviBits(2), ImmGen(IMM_SB, rviBits), 0.S)
|
||||
val taken =
|
||||
prevRVI && (rviJump || rviJALR || rviBranch && s2_btb_resp_bits.bht.taken) ||
|
||||
valid && (rvcJump || rvcJALR || rvcJR || rvcBranch && s2_btb_resp_bits.bht.taken)
|
||||
|
||||
when (!prevTaken) {
|
||||
taken_idx := idx
|
||||
after_idx := idx + 1
|
||||
btb.io.ras_update.valid := !s2_wrong_path && (prevRVI && (rviCall || rviReturn) || valid && (rvcCall || rvcReturn))
|
||||
btb.io.ras_update.bits.prediction.valid := true
|
||||
btb.io.ras_update.bits.cfiType := Mux(Mux(prevRVI, rviReturn, rvcReturn), CFIType.ret, CFIType.call)
|
||||
|
||||
when (!s2_btb_hit) {
|
||||
when (prevRVI && (rviJALR && !(rviReturn && btb.io.ras_head.valid)) ||
|
||||
valid && (rvcJALR || (rvcJR && !btb.io.ras_head.valid))) {
|
||||
s2_wrong_path := true
|
||||
}
|
||||
when (taken) {
|
||||
val pc = s2_base_pc | (idx*coreInstBytes)
|
||||
val npc =
|
||||
if (idx == 0) pc.asSInt + Mux(prevRVI, rviImm -& 2.S, rvcImm)
|
||||
else Mux(prevRVI, pc - coreInstBytes, pc).asSInt + Mux(prevRVI, rviImm, rvcImm)
|
||||
predicted_npc := Mux(prevRVI && rviReturn || valid && rvcReturn, btb.io.ras_head.bits, npc.asUInt)
|
||||
}
|
||||
|
||||
when (prevRVI && rviBranch || valid && rvcBranch) {
|
||||
btb.io.bht_advance.valid := !s2_wrong_path && !s2_btb_resp_valid
|
||||
btb.io.bht_advance.bits := s2_btb_resp_bits
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (idx == fetchWidth-1) {
|
||||
s2_partial_insn_valid := false
|
||||
when (valid && !prevTaken && !rvc) {
|
||||
s2_partial_insn_valid := true
|
||||
s2_partial_insn := bits | 0x3
|
||||
}
|
||||
prevTaken || taken
|
||||
} else {
|
||||
scanInsns(idx + 1, valid, bits, prevTaken || taken)
|
||||
}
|
||||
}
|
||||
|
||||
btb.io.ras_update.bits.returnAddr := s2_base_pc + (after_idx << log2Ceil(coreInstBytes))
|
||||
|
||||
val taken = scanInsns(0, s2_partial_insn_valid, s2_partial_insn, false.B)
|
||||
when (s2_btb_hit) {
|
||||
s2_partial_insn_valid := false
|
||||
}.otherwise {
|
||||
fq.io.enq.bits.btb.bits.bridx := taken_idx
|
||||
when (taken) {
|
||||
fq.io.enq.bits.btb.valid := true
|
||||
fq.io.enq.bits.btb.bits.taken := true
|
||||
s2_redirect := true
|
||||
}
|
||||
}
|
||||
}
|
||||
when (s2_redirect) { s2_partial_insn_valid := false }
|
||||
when (io.cpu.req.valid) { s2_wrong_path := false }
|
||||
}
|
||||
|
||||
io.cpu.resp <> fq.io.deq
|
||||
|
||||
// performance events
|
||||
io.cpu.perf.acquire := edge.done(icache.io.tl_out(0).a)
|
||||
io.cpu.perf.tlbMiss := io.ptw.req.fire()
|
||||
|
||||
def alignPC(pc: UInt) = ~(~pc | (coreInstBytes - 1))
|
||||
}
|
||||
|
||||
/** Mix-ins for constructing tiles that have an ICache-based pipeline frontend */
|
||||
|
Reference in New Issue
Block a user