Merge pull request #769 from freechipsproject/new-div-sqrt
Incorporate new div/sqrt unit
This commit is contained in:
commit
efce8f06b8
@ -1 +1 @@
|
|||||||
Subproject commit dd098501bd6acf074fcd0bb109312adc4d83f9f9
|
Subproject commit 6909906e7e46e9abec601669a92a3af567531d5e
|
@ -81,14 +81,13 @@ class FrontendModule(outer: Frontend) extends LazyModuleImp(outer)
|
|||||||
val s2_pc = Reg(init=io.resetVector)
|
val s2_pc = Reg(init=io.resetVector)
|
||||||
val s2_btb_resp_valid = Reg(init=Bool(false))
|
val s2_btb_resp_valid = Reg(init=Bool(false))
|
||||||
val s2_btb_resp_bits = Reg(new BTBResp)
|
val s2_btb_resp_bits = Reg(new BTBResp)
|
||||||
val s2_maybe_pf = Reg(init=Bool(false))
|
val s2_maybe_pf = Reg(Bool())
|
||||||
val s2_maybe_ae = Reg(init=Bool(false))
|
val s2_maybe_ae = Reg(Bool())
|
||||||
val s2_tlb_miss = Reg(Bool())
|
val s2_tlb_miss = Reg(Bool())
|
||||||
val s2_pf = s2_maybe_pf && !s2_tlb_miss
|
val s2_pf = s2_maybe_pf && !s2_tlb_miss
|
||||||
val s2_ae = s2_maybe_ae && !s2_tlb_miss
|
val s2_ae = s2_maybe_ae && !s2_tlb_miss
|
||||||
val s2_xcpt = s2_pf || s2_ae
|
val s2_xcpt = s2_pf || s2_ae
|
||||||
val s2_speculative = Reg(init=Bool(false))
|
val s2_speculative = Reg(init=Bool(false))
|
||||||
val s2_cacheable = Reg(init=Bool(false))
|
|
||||||
|
|
||||||
val fetchBytes = coreInstBytes * fetchWidth
|
val fetchBytes = coreInstBytes * fetchWidth
|
||||||
val s1_base_pc = ~(~s1_pc | (fetchBytes - 1))
|
val s1_base_pc = ~(~s1_pc | (fetchBytes - 1))
|
||||||
@ -113,7 +112,6 @@ class FrontendModule(outer: Frontend) extends LazyModuleImp(outer)
|
|||||||
s2_valid := true
|
s2_valid := true
|
||||||
s2_pc := s1_pc
|
s2_pc := s1_pc
|
||||||
s2_speculative := s1_speculative
|
s2_speculative := s1_speculative
|
||||||
s2_cacheable := tlb.io.resp.cacheable
|
|
||||||
s2_maybe_pf := tlb.io.resp.pf.inst
|
s2_maybe_pf := tlb.io.resp.pf.inst
|
||||||
s2_maybe_ae := tlb.io.resp.ae.inst
|
s2_maybe_ae := tlb.io.resp.ae.inst
|
||||||
s2_tlb_miss := tlb.io.resp.miss
|
s2_tlb_miss := tlb.io.resp.miss
|
||||||
@ -161,7 +159,7 @@ class FrontendModule(outer: Frontend) extends LazyModuleImp(outer)
|
|||||||
icache.io.s1_paddr := tlb.io.resp.paddr
|
icache.io.s1_paddr := tlb.io.resp.paddr
|
||||||
icache.io.s2_vaddr := s2_pc
|
icache.io.s2_vaddr := s2_pc
|
||||||
icache.io.s1_kill := io.cpu.req.valid || tlb.io.resp.miss || s2_replay
|
icache.io.s1_kill := io.cpu.req.valid || tlb.io.resp.miss || s2_replay
|
||||||
icache.io.s2_kill := RegNext(RegNext(s0_valid)) && s2_speculative && !s2_cacheable || s2_xcpt
|
icache.io.s2_kill := s2_valid && (s2_speculative || s2_xcpt)
|
||||||
|
|
||||||
fq.io.enq.valid := s2_valid && (icache.io.resp.valid || icache.io.s2_kill)
|
fq.io.enq.valid := s2_valid && (icache.io.resp.valid || icache.io.s2_kill)
|
||||||
fq.io.enq.bits.pc := s2_pc
|
fq.io.enq.bits.pc := s2_pc
|
||||||
|
@ -108,27 +108,24 @@ class ICacheModule(outer: ICache) extends LazyModuleImp(outer)
|
|||||||
val s2_slaveValid = RegNext(s1_slaveValid, false.B)
|
val s2_slaveValid = RegNext(s1_slaveValid, false.B)
|
||||||
val s3_slaveValid = RegNext(false.B)
|
val s3_slaveValid = RegNext(false.B)
|
||||||
|
|
||||||
val s_ready :: s_request :: s_refill :: Nil = Enum(UInt(), 3)
|
val s1_valid = Reg(init=Bool(false))
|
||||||
val state = Reg(init=s_ready)
|
val s1_tag_hit = Wire(Vec(nWays, Bool()))
|
||||||
val invalidated = Reg(Bool())
|
val s1_hit = s1_tag_hit.reduce(_||_) || Mux(s1_slaveValid, true.B, addrMaybeInScratchpad(io.s1_paddr))
|
||||||
|
val s2_valid = RegNext(s1_valid && !io.s1_kill, Bool(false))
|
||||||
|
val s2_hit = RegNext(s1_hit)
|
||||||
|
|
||||||
val refill_addr = Reg(UInt(width = paddrBits))
|
val invalidated = Reg(Bool())
|
||||||
|
val refill_valid = RegInit(false.B)
|
||||||
|
val s2_miss = s2_valid && !s2_hit && !io.s2_kill && !RegNext(refill_valid)
|
||||||
|
val refill_addr = RegEnable(io.s1_paddr, s1_valid && !(refill_valid || s2_miss))
|
||||||
val refill_tag = refill_addr(tagBits+untagBits-1,untagBits)
|
val refill_tag = refill_addr(tagBits+untagBits-1,untagBits)
|
||||||
val refill_idx = refill_addr(untagBits-1,blockOffBits)
|
val refill_idx = refill_addr(untagBits-1,blockOffBits)
|
||||||
val s1_tag_hit = Wire(Vec(nWays, Bool()))
|
|
||||||
val s1_any_tag_hit = s1_tag_hit.reduce(_||_) || Mux(s1_slaveValid, true.B, addrMaybeInScratchpad(io.s1_paddr))
|
|
||||||
|
|
||||||
val s1_valid = Reg(init=Bool(false))
|
|
||||||
val s1_hit = s1_valid && s1_any_tag_hit
|
|
||||||
val s1_miss = s1_valid && state === s_ready && !s1_any_tag_hit
|
|
||||||
|
|
||||||
io.req.ready := !(tl_out.d.fire() || s0_slaveValid || s3_slaveValid)
|
io.req.ready := !(tl_out.d.fire() || s0_slaveValid || s3_slaveValid)
|
||||||
val s0_valid = io.req.fire()
|
val s0_valid = io.req.fire()
|
||||||
val s0_vaddr = io.req.bits.addr
|
val s0_vaddr = io.req.bits.addr
|
||||||
|
|
||||||
s1_valid := s0_valid
|
s1_valid := s0_valid
|
||||||
|
|
||||||
when (s1_miss) { refill_addr := io.s1_paddr }
|
|
||||||
val (_, _, refill_done, refill_cnt) = edge_out.count(tl_out.d)
|
val (_, _, refill_done, refill_cnt) = edge_out.count(tl_out.d)
|
||||||
tl_out.d.ready := !s3_slaveValid
|
tl_out.d.ready := !s3_slaveValid
|
||||||
require (edge_out.manager.minLatency > 0)
|
require (edge_out.manager.minLatency > 0)
|
||||||
@ -213,11 +210,9 @@ class ICacheModule(outer: ICache) extends LazyModuleImp(outer)
|
|||||||
require(dECC.isInstanceOf[uncore.util.IdentityCode])
|
require(dECC.isInstanceOf[uncore.util.IdentityCode])
|
||||||
require(outer.icacheParams.itimAddr.isEmpty)
|
require(outer.icacheParams.itimAddr.isEmpty)
|
||||||
io.resp.bits := Mux1H(s1_tag_hit, s1_dout)
|
io.resp.bits := Mux1H(s1_tag_hit, s1_dout)
|
||||||
io.resp.valid := s1_hit
|
io.resp.valid := s1_valid && s1_hit
|
||||||
|
|
||||||
case 2 =>
|
case 2 =>
|
||||||
val s2_valid = RegNext(s1_valid && !io.s1_kill, Bool(false))
|
|
||||||
val s2_hit = RegNext(s1_hit)
|
|
||||||
val s2_tag_hit = RegEnable(s1_tag_hit, s1_valid || s1_slaveValid)
|
val s2_tag_hit = RegEnable(s1_tag_hit, s1_valid || s1_slaveValid)
|
||||||
val s2_dout = RegEnable(s1_dout, s1_valid || s1_slaveValid)
|
val s2_dout = RegEnable(s1_dout, s1_valid || s1_slaveValid)
|
||||||
val s2_way_mux = Mux1H(s2_tag_hit, s2_dout)
|
val s2_way_mux = Mux1H(s2_tag_hit, s2_dout)
|
||||||
@ -279,7 +274,7 @@ class ICacheModule(outer: ICache) extends LazyModuleImp(outer)
|
|||||||
tl.e.ready := true
|
tl.e.ready := true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
tl_out.a.valid := state === s_request && !io.s2_kill
|
tl_out.a.valid := s2_miss && !refill_valid
|
||||||
tl_out.a.bits := edge_out.Get(
|
tl_out.a.bits := edge_out.Get(
|
||||||
fromSource = UInt(0),
|
fromSource = UInt(0),
|
||||||
toAddress = (refill_addr >> blockOffBits) << blockOffBits,
|
toAddress = (refill_addr >> blockOffBits) << blockOffBits,
|
||||||
@ -289,19 +284,7 @@ class ICacheModule(outer: ICache) extends LazyModuleImp(outer)
|
|||||||
tl_out.e.valid := Bool(false)
|
tl_out.e.valid := Bool(false)
|
||||||
assert(!(tl_out.a.valid && addrMaybeInScratchpad(tl_out.a.bits.address)))
|
assert(!(tl_out.a.valid && addrMaybeInScratchpad(tl_out.a.bits.address)))
|
||||||
|
|
||||||
// control state machine
|
when (!refill_valid) { invalidated := false.B }
|
||||||
switch (state) {
|
when (tl_out.a.fire()) { refill_valid := true.B }
|
||||||
is (s_ready) {
|
when (refill_done) { refill_valid := false.B}
|
||||||
when (s1_miss && !io.s1_kill) { state := s_request }
|
|
||||||
invalidated := Bool(false)
|
|
||||||
}
|
|
||||||
is (s_request) {
|
|
||||||
when (tl_out.a.ready) { state := s_refill }
|
|
||||||
when (io.s2_kill) { state := s_ready }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
when (refill_done) {
|
|
||||||
assert(state === s_refill)
|
|
||||||
state := s_ready
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
@ -82,7 +82,7 @@ class FPUDecoder(implicit p: Parameters) extends FPUModule()(p) {
|
|||||||
FNMADD_S -> List(N,Y,Y,Y,Y,N,N,Y,Y,N,N,N,Y,N,N,Y),
|
FNMADD_S -> List(N,Y,Y,Y,Y,N,N,Y,Y,N,N,N,Y,N,N,Y),
|
||||||
FNMSUB_S -> List(N,Y,Y,Y,Y,N,N,Y,Y,N,N,N,Y,N,N,Y),
|
FNMSUB_S -> List(N,Y,Y,Y,Y,N,N,Y,Y,N,N,N,Y,N,N,Y),
|
||||||
FDIV_S -> List(N,Y,Y,Y,N,N,N,Y,Y,N,N,N,N,Y,N,Y),
|
FDIV_S -> List(N,Y,Y,Y,N,N,N,Y,Y,N,N,N,N,Y,N,Y),
|
||||||
FSQRT_S -> List(N,Y,Y,N,N,Y,X,Y,Y,N,N,N,N,N,Y,Y))
|
FSQRT_S -> List(N,Y,Y,N,N,N,X,Y,Y,N,N,N,N,N,Y,Y))
|
||||||
val d =
|
val d =
|
||||||
Array(FLD -> List(Y,Y,N,N,N,X,X,X,N,N,N,N,N,N,N,N),
|
Array(FLD -> List(Y,Y,N,N,N,X,X,X,N,N,N,N,N,N,N,N),
|
||||||
FSD -> List(Y,N,N,Y,N,Y,X,N,N,N,Y,N,N,N,N,N),
|
FSD -> List(Y,N,N,Y,N,Y,X,N,N,N,Y,N,N,N,N,N),
|
||||||
@ -115,7 +115,7 @@ class FPUDecoder(implicit p: Parameters) extends FPUModule()(p) {
|
|||||||
FNMADD_D -> List(N,Y,Y,Y,Y,N,N,N,N,N,N,N,Y,N,N,Y),
|
FNMADD_D -> List(N,Y,Y,Y,Y,N,N,N,N,N,N,N,Y,N,N,Y),
|
||||||
FNMSUB_D -> List(N,Y,Y,Y,Y,N,N,N,N,N,N,N,Y,N,N,Y),
|
FNMSUB_D -> List(N,Y,Y,Y,Y,N,N,N,N,N,N,N,Y,N,N,Y),
|
||||||
FDIV_D -> List(N,Y,Y,Y,N,N,N,N,N,N,N,N,N,Y,N,Y),
|
FDIV_D -> List(N,Y,Y,Y,N,N,N,N,N,N,N,N,N,Y,N,Y),
|
||||||
FSQRT_D -> List(N,Y,Y,N,N,Y,X,N,N,N,N,N,N,N,Y,Y))
|
FSQRT_D -> List(N,Y,Y,N,N,N,X,N,N,N,N,N,N,N,Y,Y))
|
||||||
|
|
||||||
val insns = fLen match {
|
val insns = fLen match {
|
||||||
case 32 => f
|
case 32 => f
|
||||||
@ -177,31 +177,6 @@ class FPInput(implicit p: Parameters) extends CoreBundle()(p) with HasFPUCtrlSig
|
|||||||
override def cloneType = new FPInput().asInstanceOf[this.type]
|
override def cloneType = new FPInput().asInstanceOf[this.type]
|
||||||
}
|
}
|
||||||
|
|
||||||
object ClassifyRecFN {
|
|
||||||
def apply(expWidth: Int, sigWidth: Int, in: UInt) = {
|
|
||||||
val sign = in(sigWidth + expWidth)
|
|
||||||
val exp = in(sigWidth + expWidth - 1, sigWidth - 1)
|
|
||||||
val sig = in(sigWidth - 2, 0)
|
|
||||||
|
|
||||||
val code = exp(expWidth,expWidth-2)
|
|
||||||
val codeHi = code(2, 1)
|
|
||||||
val isSpecial = codeHi === UInt(3)
|
|
||||||
|
|
||||||
val isHighSubnormalIn = exp(expWidth-2, 0) < UInt(2)
|
|
||||||
val isSubnormal = code === UInt(1) || codeHi === UInt(1) && isHighSubnormalIn
|
|
||||||
val isNormal = codeHi === UInt(1) && !isHighSubnormalIn || codeHi === UInt(2)
|
|
||||||
val isZero = code === UInt(0)
|
|
||||||
val isInf = isSpecial && !exp(expWidth-2)
|
|
||||||
val isNaN = code.andR
|
|
||||||
val isSNaN = isNaN && !sig(sigWidth-2)
|
|
||||||
val isQNaN = isNaN && sig(sigWidth-2)
|
|
||||||
|
|
||||||
Cat(isQNaN, isSNaN, isInf && !sign, isNormal && !sign,
|
|
||||||
isSubnormal && !sign, isZero && !sign, isZero && sign,
|
|
||||||
isSubnormal && sign, isNormal && sign, isInf && sign)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
case class FType(exp: Int, sig: Int) {
|
case class FType(exp: Int, sig: Int) {
|
||||||
def ieeeWidth = exp + sig
|
def ieeeWidth = exp + sig
|
||||||
def recodedWidth = ieeeWidth + 1
|
def recodedWidth = ieeeWidth + 1
|
||||||
@ -210,6 +185,26 @@ case class FType(exp: Int, sig: Int) {
|
|||||||
def isNaN(x: UInt) = x(sig + exp - 1, sig + exp - 3).andR
|
def isNaN(x: UInt) = x(sig + exp - 1, sig + exp - 3).andR
|
||||||
def isSNaN(x: UInt) = isNaN(x) && !x(sig - 2)
|
def isSNaN(x: UInt) = isNaN(x) && !x(sig - 2)
|
||||||
|
|
||||||
|
def classify(x: UInt) = {
|
||||||
|
val sign = x(sig + exp)
|
||||||
|
val code = x(exp + sig - 1, exp + sig - 3)
|
||||||
|
val codeHi = code(2, 1)
|
||||||
|
val isSpecial = codeHi === UInt(3)
|
||||||
|
|
||||||
|
val isHighSubnormalIn = x(exp + sig - 3, sig - 1) < UInt(2)
|
||||||
|
val isSubnormal = code === UInt(1) || codeHi === UInt(1) && isHighSubnormalIn
|
||||||
|
val isNormal = codeHi === UInt(1) && !isHighSubnormalIn || codeHi === UInt(2)
|
||||||
|
val isZero = code === UInt(0)
|
||||||
|
val isInf = isSpecial && !code(0)
|
||||||
|
val isNaN = code.andR
|
||||||
|
val isSNaN = isNaN && !x(sig-2)
|
||||||
|
val isQNaN = isNaN && x(sig-2)
|
||||||
|
|
||||||
|
Cat(isQNaN, isSNaN, isInf && !sign, isNormal && !sign,
|
||||||
|
isSubnormal && !sign, isZero && !sign, isZero && sign,
|
||||||
|
isSubnormal && sign, isNormal && sign, isInf && sign)
|
||||||
|
}
|
||||||
|
|
||||||
// convert between formats, ignoring rounding, range, NaN
|
// convert between formats, ignoring rounding, range, NaN
|
||||||
def unsafeConvert(x: UInt, to: FType) = if (this == to) x else {
|
def unsafeConvert(x: UInt, to: FType) = if (this == to) x else {
|
||||||
val sign = x(sig + exp)
|
val sign = x(sig + exp)
|
||||||
@ -237,13 +232,16 @@ object FType {
|
|||||||
|
|
||||||
trait HasFPUParameters {
|
trait HasFPUParameters {
|
||||||
val fLen: Int
|
val fLen: Int
|
||||||
val (sExpWidth, sSigWidth) = (FType.S.exp, FType.S.sig)
|
val xLen: Int
|
||||||
val (dExpWidth, dSigWidth) = (FType.D.exp, FType.D.sig)
|
val minXLen = 32
|
||||||
|
val nIntTypes = log2Ceil(xLen/minXLen) + 1
|
||||||
val floatTypes = FType.all.filter(_.ieeeWidth <= fLen)
|
val floatTypes = FType.all.filter(_.ieeeWidth <= fLen)
|
||||||
|
val minType = floatTypes.head
|
||||||
val maxType = floatTypes.last
|
val maxType = floatTypes.last
|
||||||
def prevType(t: FType) = floatTypes(floatTypes.indexOf(t) - 1)
|
def prevType(t: FType) = floatTypes(typeTag(t) - 1)
|
||||||
val maxExpWidth = maxType.exp
|
val maxExpWidth = maxType.exp
|
||||||
val maxSigWidth = maxType.sig
|
val maxSigWidth = maxType.sig
|
||||||
|
def typeTag(t: FType) = floatTypes.indexOf(t)
|
||||||
|
|
||||||
private def isBox(x: UInt, t: FType): Bool = x(t.sig + t.exp, t.sig + t.exp - 4).andR
|
private def isBox(x: UInt, t: FType): Bool = x(t.sig + t.exp, t.sig + t.exp - 4).andR
|
||||||
|
|
||||||
@ -261,10 +259,11 @@ trait HasFPUParameters {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// implement NaN unboxing for FU inputs
|
// implement NaN unboxing for FU inputs
|
||||||
def unbox(x: UInt, tag: UInt): UInt = {
|
def unbox(x: UInt, tag: UInt, exactType: Option[FType]): UInt = {
|
||||||
|
val outType = exactType.getOrElse(maxType)
|
||||||
def helper(x: UInt, t: FType): Seq[(Bool, UInt)] = {
|
def helper(x: UInt, t: FType): Seq[(Bool, UInt)] = {
|
||||||
val prev =
|
val prev =
|
||||||
if (floatTypes.indexOf(t) == 0) {
|
if (t == minType) {
|
||||||
Seq()
|
Seq()
|
||||||
} else {
|
} else {
|
||||||
val prevT = prevType(t)
|
val prevT = prevType(t)
|
||||||
@ -276,18 +275,21 @@ trait HasFPUParameters {
|
|||||||
val isbox = isBox(x, t)
|
val isbox = isBox(x, t)
|
||||||
prev.map(p => (isbox && p._1, p._2))
|
prev.map(p => (isbox && p._1, p._2))
|
||||||
}
|
}
|
||||||
prev :+ (true.B, t.unsafeConvert(x, maxType))
|
prev :+ (true.B, t.unsafeConvert(x, outType))
|
||||||
}
|
}
|
||||||
|
|
||||||
val res = helper(x, maxType)
|
val (oks, floats) = helper(x, maxType).unzip
|
||||||
val oks = res.map(_._1)
|
if (exactType.isEmpty || floatTypes.size == 1) {
|
||||||
val floats = res.map(_._2)
|
|
||||||
Mux(oks(tag), floats(tag), maxType.qNaN)
|
Mux(oks(tag), floats(tag), maxType.qNaN)
|
||||||
|
} else {
|
||||||
|
val t = exactType.get
|
||||||
|
floats(typeTag(t)) | Mux(oks(typeTag(t)), 0.U, t.qNaN)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// make sure that the redundant bits in the NaN-boxed encoding are consistent
|
// make sure that the redundant bits in the NaN-boxed encoding are consistent
|
||||||
def consistent(x: UInt): Bool = {
|
def consistent(x: UInt): Bool = {
|
||||||
def helper(x: UInt, t: FType): Bool = if (floatTypes.indexOf(t) == 0) true.B else {
|
def helper(x: UInt, t: FType): Bool = if (typeTag(t) == 0) true.B else {
|
||||||
val prevT = prevType(t)
|
val prevT = prevType(t)
|
||||||
val unswizzled = Cat(
|
val unswizzled = Cat(
|
||||||
x(prevT.sig + prevT.exp - 1),
|
x(prevT.sig + prevT.exp - 1),
|
||||||
@ -306,7 +308,7 @@ trait HasFPUParameters {
|
|||||||
if (yt == maxType) {
|
if (yt == maxType) {
|
||||||
y
|
y
|
||||||
} else {
|
} else {
|
||||||
val nt = floatTypes(floatTypes.indexOf(yt) + 1)
|
val nt = floatTypes(typeTag(yt) + 1)
|
||||||
val bigger = box(UInt((BigInt(1) << nt.recodedWidth)-1), nt, y, yt)
|
val bigger = box(UInt((BigInt(1) << nt.recodedWidth)-1), nt, y, yt)
|
||||||
bigger | UInt((BigInt(1) << maxType.recodedWidth) - (BigInt(1) << nt.recodedWidth))
|
bigger | UInt((BigInt(1) << maxType.recodedWidth) - (BigInt(1) << nt.recodedWidth))
|
||||||
}
|
}
|
||||||
@ -317,7 +319,7 @@ trait HasFPUParameters {
|
|||||||
|
|
||||||
// zap bits that hardfloat thinks are don't-cares, but we do care about
|
// zap bits that hardfloat thinks are don't-cares, but we do care about
|
||||||
def sanitizeNaN(x: UInt, t: FType): UInt = {
|
def sanitizeNaN(x: UInt, t: FType): UInt = {
|
||||||
if (floatTypes.indexOf(t) == 0) {
|
if (typeTag(t) == 0) {
|
||||||
x
|
x
|
||||||
} else {
|
} else {
|
||||||
val maskedNaN = x & ~UInt((BigInt(1) << (t.sig-1)) | (BigInt(1) << (t.sig+t.exp-4)), t.recodedWidth)
|
val maskedNaN = x & ~UInt((BigInt(1) << (t.sig-1)) | (BigInt(1) << (t.sig+t.exp-4)), t.recodedWidth)
|
||||||
@ -328,7 +330,7 @@ trait HasFPUParameters {
|
|||||||
// implement NaN boxing and recoding for FL*/fmv.*.x
|
// implement NaN boxing and recoding for FL*/fmv.*.x
|
||||||
def recode(x: UInt, tag: UInt): UInt = {
|
def recode(x: UInt, tag: UInt): UInt = {
|
||||||
def helper(x: UInt, t: FType): UInt = {
|
def helper(x: UInt, t: FType): UInt = {
|
||||||
if (floatTypes.indexOf(t) == 0) {
|
if (typeTag(t) == 0) {
|
||||||
t.recode(x)
|
t.recode(x)
|
||||||
} else {
|
} else {
|
||||||
val prevT = prevType(t)
|
val prevT = prevType(t)
|
||||||
@ -343,7 +345,7 @@ trait HasFPUParameters {
|
|||||||
|
|
||||||
// implement NaN unboxing and un-recoding for FS*/fmv.x.*
|
// implement NaN unboxing and un-recoding for FS*/fmv.x.*
|
||||||
def ieee(x: UInt, t: FType = maxType): UInt = {
|
def ieee(x: UInt, t: FType = maxType): UInt = {
|
||||||
if (floatTypes.indexOf(t) == 0) {
|
if (typeTag(t) == 0) {
|
||||||
t.ieee(x)
|
t.ieee(x)
|
||||||
} else {
|
} else {
|
||||||
val unrecoded = t.ieee(x)
|
val unrecoded = t.ieee(x)
|
||||||
@ -377,39 +379,54 @@ class FPToInt(implicit p: Parameters) extends FPUModule()(p) {
|
|||||||
val in = RegEnable(io.in.bits, io.in.valid)
|
val in = RegEnable(io.in.bits, io.in.valid)
|
||||||
val valid = Reg(next=io.in.valid)
|
val valid = Reg(next=io.in.valid)
|
||||||
|
|
||||||
val classify_s = ClassifyRecFN(sExpWidth, sSigWidth, maxType.unsafeConvert(in.in1, FType.S))
|
|
||||||
val classify_out = fLen match {
|
|
||||||
case 32 => classify_s
|
|
||||||
case 64 =>
|
|
||||||
val classify_d = ClassifyRecFN(dExpWidth, dSigWidth, in.in1)
|
|
||||||
Mux(in.singleIn, classify_s, classify_d)
|
|
||||||
}
|
|
||||||
|
|
||||||
val dcmp = Module(new hardfloat.CompareRecFN(maxExpWidth, maxSigWidth))
|
val dcmp = Module(new hardfloat.CompareRecFN(maxExpWidth, maxSigWidth))
|
||||||
dcmp.io.a := in.in1
|
dcmp.io.a := in.in1
|
||||||
dcmp.io.b := in.in2
|
dcmp.io.b := in.in2
|
||||||
dcmp.io.signaling := !in.rm(1)
|
dcmp.io.signaling := !in.rm(1)
|
||||||
|
|
||||||
|
val tag = !in.singleOut // TODO typeTag
|
||||||
val store = ieee(in.in1)
|
val store = ieee(in.in1)
|
||||||
val toint = Mux(in.rm(0), classify_out, store)
|
val toint = Wire(init = store)
|
||||||
io.out.bits.store := Mux(in.singleOut, Fill(xLen/32, store(31, 0)), store)
|
val intType = Wire(init = tag)
|
||||||
io.out.bits.toint := Mux(in.singleOut, toint(31, 0).sextTo(xLen), toint)
|
io.out.bits.store := ((0 until nIntTypes).map(i => Fill(1 << (nIntTypes - i - 1), store((minXLen << i) - 1, 0))): Seq[UInt])(tag)
|
||||||
|
io.out.bits.toint := ((0 until nIntTypes).map(i => toint((minXLen << i) - 1, 0).sextTo(xLen)): Seq[UInt])(intType)
|
||||||
io.out.bits.exc := Bits(0)
|
io.out.bits.exc := Bits(0)
|
||||||
|
|
||||||
|
when (in.rm(0)) {
|
||||||
|
val classify_out = (floatTypes.map(t => t.classify(maxType.unsafeConvert(in.in1, t))): Seq[UInt])(tag)
|
||||||
|
toint := classify_out | (store >> minXLen << minXLen)
|
||||||
|
intType := 0
|
||||||
|
}
|
||||||
|
|
||||||
when (in.wflags) { // feq/flt/fle, fcvt
|
when (in.wflags) { // feq/flt/fle, fcvt
|
||||||
io.out.bits.toint := (~in.rm & Cat(dcmp.io.lt, dcmp.io.eq)).orR
|
toint := (~in.rm & Cat(dcmp.io.lt, dcmp.io.eq)).orR | (store >> minXLen << minXLen)
|
||||||
io.out.bits.exc := dcmp.io.exceptionFlags
|
io.out.bits.exc := dcmp.io.exceptionFlags
|
||||||
|
intType := 0
|
||||||
|
|
||||||
when (!in.ren2) { // fcvt
|
when (!in.ren2) { // fcvt
|
||||||
val minXLen = 32
|
val cvtType = in.typ.extract(log2Ceil(nIntTypes), 1)
|
||||||
val n = log2Ceil(xLen/minXLen) + 1
|
intType := cvtType
|
||||||
for (i <- 0 until n) {
|
|
||||||
val conv = Module(new hardfloat.RecFNToIN(maxExpWidth, maxSigWidth, minXLen << i))
|
val conv = Module(new hardfloat.RecFNToIN(maxExpWidth, maxSigWidth, xLen))
|
||||||
conv.io.in := in.in1
|
conv.io.in := in.in1
|
||||||
conv.io.roundingMode := in.rm
|
conv.io.roundingMode := in.rm
|
||||||
conv.io.signedOut := ~in.typ(0)
|
conv.io.signedOut := ~in.typ(0)
|
||||||
when (in.typ.extract(log2Ceil(n), 1) === i) {
|
toint := conv.io.out
|
||||||
io.out.bits.toint := conv.io.out.sextTo(xLen)
|
|
||||||
io.out.bits.exc := Cat(conv.io.intExceptionFlags(2, 1).orR, UInt(0, 3), conv.io.intExceptionFlags(0))
|
io.out.bits.exc := Cat(conv.io.intExceptionFlags(2, 1).orR, UInt(0, 3), conv.io.intExceptionFlags(0))
|
||||||
|
|
||||||
|
for (i <- 0 until nIntTypes-1) {
|
||||||
|
val w = minXLen << i
|
||||||
|
when (cvtType === i) {
|
||||||
|
val narrow = Module(new hardfloat.RecFNToIN(maxExpWidth, maxSigWidth, w))
|
||||||
|
narrow.io.in := in.in1
|
||||||
|
narrow.io.roundingMode := in.rm
|
||||||
|
narrow.io.signedOut := ~in.typ(0)
|
||||||
|
|
||||||
|
val excSign = in.in1(maxExpWidth + maxSigWidth) && !maxType.isNaN(in.in1)
|
||||||
|
val excOut = Cat(conv.io.signedOut === excSign, Fill(w-1, !excSign))
|
||||||
|
val invalid = conv.io.intExceptionFlags(2) || narrow.io.intExceptionFlags(1)
|
||||||
|
when (invalid) { toint := Cat(conv.io.out >> w, excOut) }
|
||||||
|
io.out.bits.exc := Cat(invalid, UInt(0, 3), !invalid && conv.io.intExceptionFlags(0))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -427,18 +444,17 @@ class IntToFP(val latency: Int)(implicit p: Parameters) extends FPUModule()(p) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
val in = Pipe(io.in)
|
val in = Pipe(io.in)
|
||||||
|
val tag = !in.bits.singleIn // TODO typeTag
|
||||||
|
|
||||||
val mux = Wire(new FPResult)
|
val mux = Wire(new FPResult)
|
||||||
mux.exc := Bits(0)
|
mux.exc := Bits(0)
|
||||||
mux.data := recode(in.bits.in1, !in.bits.singleIn)
|
mux.data := recode(in.bits.in1, !in.bits.singleIn)
|
||||||
|
|
||||||
val intValue = {
|
val intValue = {
|
||||||
val minXLen = 32
|
|
||||||
val n = log2Ceil(xLen/minXLen) + 1
|
|
||||||
val res = Wire(init = in.bits.in1.asSInt)
|
val res = Wire(init = in.bits.in1.asSInt)
|
||||||
for (i <- 0 until n-1) {
|
for (i <- 0 until nIntTypes-1) {
|
||||||
val smallInt = in.bits.in1((minXLen << i) - 1, 0)
|
val smallInt = in.bits.in1((minXLen << i) - 1, 0)
|
||||||
when (in.bits.typ.extract(log2Ceil(n), 1) === i) {
|
when (in.bits.typ.extract(log2Ceil(nIntTypes), 1) === i) {
|
||||||
res := Mux(in.bits.typ(0), smallInt.zext, smallInt.asSInt)
|
res := Mux(in.bits.typ(0), smallInt.zext, smallInt.asSInt)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -446,28 +462,21 @@ class IntToFP(val latency: Int)(implicit p: Parameters) extends FPUModule()(p) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
when (in.bits.wflags) { // fcvt
|
when (in.bits.wflags) { // fcvt
|
||||||
val l2s = Module(new hardfloat.INToRecFN(xLen, sExpWidth, sSigWidth))
|
// could be improved for RVD/RVQ with a single variable-position rounding
|
||||||
l2s.io.signedIn := ~in.bits.typ(0)
|
// unit, rather than N fixed-position ones
|
||||||
l2s.io.in := intValue
|
val i2fResults = for (t <- floatTypes) yield {
|
||||||
l2s.io.roundingMode := in.bits.rm
|
val i2f = Module(new hardfloat.INToRecFN(xLen, t.exp, t.sig))
|
||||||
l2s.io.detectTininess := hardfloat.consts.tininess_afterRounding
|
i2f.io.signedIn := ~in.bits.typ(0)
|
||||||
mux.data := sanitizeNaN(l2s.io.out, FType.S)
|
i2f.io.in := intValue
|
||||||
mux.exc := l2s.io.exceptionFlags
|
i2f.io.roundingMode := in.bits.rm
|
||||||
|
i2f.io.detectTininess := hardfloat.consts.tininess_afterRounding
|
||||||
|
(sanitizeNaN(i2f.io.out, t), i2f.io.exceptionFlags)
|
||||||
|
}
|
||||||
|
|
||||||
fLen match {
|
val (data, exc) = i2fResults.unzip
|
||||||
case 32 =>
|
val dataPadded = data.init.map(d => Cat(data.last >> d.getWidth, d)) :+ data.last
|
||||||
case 64 =>
|
mux.data := dataPadded(tag)
|
||||||
val l2d = Module(new hardfloat.INToRecFN(xLen, dExpWidth, dSigWidth))
|
mux.exc := exc(tag)
|
||||||
l2d.io.signedIn := ~in.bits.typ(0)
|
|
||||||
l2d.io.in := intValue
|
|
||||||
l2d.io.roundingMode := in.bits.rm
|
|
||||||
l2d.io.detectTininess := hardfloat.consts.tininess_afterRounding
|
|
||||||
mux.data := Cat(l2d.io.out >> l2s.io.out.getWidth, l2s.io.out)
|
|
||||||
when (!in.bits.singleIn) {
|
|
||||||
mux.data := sanitizeNaN(l2d.io.out, FType.D)
|
|
||||||
mux.exc := l2d.io.exceptionFlags
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
io.out <> Pipe(in.valid, mux, latency-1)
|
io.out <> Pipe(in.valid, mux, latency-1)
|
||||||
@ -499,35 +508,33 @@ class FPToFP(val latency: Int)(implicit p: Parameters) extends FPUModule()(p) {
|
|||||||
fsgnjMux.data := Mux(isNaNOut, maxType.qNaN, Mux(isLHS, in.bits.in1, in.bits.in2))
|
fsgnjMux.data := Mux(isNaNOut, maxType.qNaN, Mux(isLHS, in.bits.in1, in.bits.in2))
|
||||||
}
|
}
|
||||||
|
|
||||||
val mux = Wire(new FPResult)
|
val inTag = !in.bits.singleIn // TODO typeTag
|
||||||
mux.exc := fsgnjMux.exc
|
val outTag = !in.bits.singleOut // TODO typeTag
|
||||||
|
val mux = Wire(init = fsgnjMux)
|
||||||
fLen match {
|
for (t <- floatTypes.init) {
|
||||||
case 32 =>
|
when (outTag === typeTag(t)) {
|
||||||
mux.data := fsgnjMux.data
|
mux.data := Cat(fsgnjMux.data >> t.recodedWidth, maxType.unsafeConvert(fsgnjMux.data, t))
|
||||||
case 64 =>
|
}
|
||||||
val fsgnjSingle = maxType.unsafeConvert(fsgnjMux.data, FType.S)
|
}
|
||||||
mux.data := Mux(in.bits.singleOut, Cat(fsgnjMux.data >> fsgnjSingle.getWidth, fsgnjSingle), fsgnjMux.data)
|
|
||||||
|
|
||||||
when (in.bits.wflags && !in.bits.ren2) { // fcvt
|
when (in.bits.wflags && !in.bits.ren2) { // fcvt
|
||||||
val d2s = Module(new hardfloat.RecFNToRecFN(dExpWidth, dSigWidth, sExpWidth, sSigWidth))
|
if (floatTypes.size > 1) {
|
||||||
d2s.io.in := in.bits.in1
|
// widening conversions simply canonicalize NaN operands
|
||||||
d2s.io.roundingMode := in.bits.rm
|
val widened = Mux(maxType.isNaN(in.bits.in1), maxType.qNaN, in.bits.in1)
|
||||||
d2s.io.detectTininess := hardfloat.consts.tininess_afterRounding
|
fsgnjMux.data := widened
|
||||||
val d2sOut = sanitizeNaN(d2s.io.out, FType.S)
|
fsgnjMux.exc := maxType.isSNaN(in.bits.in1) << 4
|
||||||
|
|
||||||
val s2d = Module(new hardfloat.RecFNToRecFN(sExpWidth, sSigWidth, dExpWidth, dSigWidth))
|
// narrowing conversions require rounding (for RVQ, this could be
|
||||||
s2d.io.in := maxType.unsafeConvert(in.bits.in1, FType.S)
|
// optimized to use a single variable-position rounding unit, rather
|
||||||
s2d.io.roundingMode := in.bits.rm
|
// than two fixed-position ones)
|
||||||
s2d.io.detectTininess := hardfloat.consts.tininess_afterRounding
|
for (outType <- floatTypes.init) when (outTag === typeTag(outType) && (typeTag(outType) == 0 || outTag < inTag)) {
|
||||||
val s2dOut = sanitizeNaN(s2d.io.out, FType.D)
|
val narrower = Module(new hardfloat.RecFNToRecFN(maxType.exp, maxType.sig, outType.exp, outType.sig))
|
||||||
|
narrower.io.in := in.bits.in1
|
||||||
when (in.bits.singleOut) {
|
narrower.io.roundingMode := in.bits.rm
|
||||||
mux.data := Cat(s2dOut >> d2sOut.getWidth, d2sOut)
|
narrower.io.detectTininess := hardfloat.consts.tininess_afterRounding
|
||||||
mux.exc := d2s.io.exceptionFlags
|
val narrowed = sanitizeNaN(narrower.io.out, outType)
|
||||||
}.otherwise {
|
mux.data := Cat(fsgnjMux.data >> narrowed.getWidth, narrowed)
|
||||||
mux.data := s2dOut
|
mux.exc := narrower.io.exceptionFlags
|
||||||
mux.exc := s2d.io.exceptionFlags
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -535,7 +542,7 @@ class FPToFP(val latency: Int)(implicit p: Parameters) extends FPUModule()(p) {
|
|||||||
io.out <> Pipe(in.valid, mux, latency-1)
|
io.out <> Pipe(in.valid, mux, latency-1)
|
||||||
}
|
}
|
||||||
|
|
||||||
class FPUFMAPipe(val latency: Int, t: FType)(implicit p: Parameters) extends FPUModule()(p) {
|
class FPUFMAPipe(val latency: Int, val t: FType)(implicit p: Parameters) extends FPUModule()(p) {
|
||||||
val io = new Bundle {
|
val io = new Bundle {
|
||||||
val in = Valid(new FPInput).flip
|
val in = Valid(new FPInput).flip
|
||||||
val out = Valid(new FPResult)
|
val out = Valid(new FPResult)
|
||||||
@ -550,9 +557,8 @@ class FPUFMAPipe(val latency: Int, t: FType)(implicit p: Parameters) extends FPU
|
|||||||
val cmd_fma = io.in.bits.ren3
|
val cmd_fma = io.in.bits.ren3
|
||||||
val cmd_addsub = io.in.bits.swap23
|
val cmd_addsub = io.in.bits.swap23
|
||||||
in := io.in.bits
|
in := io.in.bits
|
||||||
in.in1 := maxType.unsafeConvert(io.in.bits.in1, t)
|
when (cmd_addsub) { in.in2 := one }
|
||||||
in.in2 := Mux(cmd_addsub, one, maxType.unsafeConvert(io.in.bits.in2, t))
|
when (!(cmd_fma || cmd_addsub)) { in.in3 := zero }
|
||||||
in.in3 := Mux(cmd_fma || cmd_addsub, maxType.unsafeConvert(io.in.bits.in3, t), zero)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
val fma = Module(new hardfloat.MulAddRecFN(t.exp, t.sig))
|
val fma = Module(new hardfloat.MulAddRecFN(t.exp, t.sig))
|
||||||
@ -612,45 +618,29 @@ class FPU(cfg: FPUParams)(implicit p: Parameters) extends FPUModule()(p) {
|
|||||||
printf("f%d p%d 0x%x\n", load_wb_tag, load_wb_tag + 32, load_wb_data)
|
printf("f%d p%d 0x%x\n", load_wb_tag, load_wb_tag + 32, load_wb_data)
|
||||||
}
|
}
|
||||||
|
|
||||||
val ex_ra1::ex_ra2::ex_ra3::Nil = List.fill(3)(Reg(UInt()))
|
val ex_ra = List.fill(3)(Reg(UInt()))
|
||||||
|
val ex_rs = ex_ra.map(a => regfile(a))
|
||||||
when (io.valid) {
|
when (io.valid) {
|
||||||
when (id_ctrl.ren1) {
|
when (id_ctrl.ren1) {
|
||||||
when (!id_ctrl.swap12) { ex_ra1 := io.inst(19,15) }
|
when (!id_ctrl.swap12) { ex_ra(0) := io.inst(19,15) }
|
||||||
when (id_ctrl.swap12) { ex_ra2 := io.inst(19,15) }
|
when (id_ctrl.swap12) { ex_ra(1) := io.inst(19,15) }
|
||||||
}
|
}
|
||||||
when (id_ctrl.ren2) {
|
when (id_ctrl.ren2) {
|
||||||
when (id_ctrl.swap12) { ex_ra1 := io.inst(24,20) }
|
when (id_ctrl.swap12) { ex_ra(0) := io.inst(24,20) }
|
||||||
when (id_ctrl.swap23) { ex_ra3 := io.inst(24,20) }
|
when (id_ctrl.swap23) { ex_ra(2) := io.inst(24,20) }
|
||||||
when (!id_ctrl.swap12 && !id_ctrl.swap23) { ex_ra2 := io.inst(24,20) }
|
when (!id_ctrl.swap12 && !id_ctrl.swap23) { ex_ra(1) := io.inst(24,20) }
|
||||||
}
|
}
|
||||||
when (id_ctrl.ren3) { ex_ra3 := io.inst(31,27) }
|
when (id_ctrl.ren3) { ex_ra(2) := io.inst(31,27) }
|
||||||
}
|
}
|
||||||
val ex_rm = Mux(ex_reg_inst(14,12) === Bits(7), io.fcsr_rm, ex_reg_inst(14,12))
|
val ex_rm = Mux(ex_reg_inst(14,12) === Bits(7), io.fcsr_rm, ex_reg_inst(14,12))
|
||||||
|
|
||||||
val req = Wire(new FPInput)
|
|
||||||
def readAndUnbox(addr: UInt) = unbox(regfile(addr), !ex_ctrl.singleIn)
|
|
||||||
req := ex_ctrl
|
|
||||||
req.rm := ex_rm
|
|
||||||
req.in1 := readAndUnbox(ex_ra1)
|
|
||||||
req.in2 := readAndUnbox(ex_ra2)
|
|
||||||
req.in3 := readAndUnbox(ex_ra3)
|
|
||||||
req.typ := ex_reg_inst(21,20)
|
|
||||||
req.fmaCmd := ex_reg_inst(3,2) | (!ex_ctrl.ren3 && ex_reg_inst(27))
|
|
||||||
when (ex_cp_valid) {
|
|
||||||
req := io.cp_req.bits
|
|
||||||
when (io.cp_req.bits.swap23) {
|
|
||||||
req.in2 := io.cp_req.bits.in3
|
|
||||||
req.in3 := io.cp_req.bits.in2
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
val sfma = Module(new FPUFMAPipe(cfg.sfmaLatency, FType.S))
|
val sfma = Module(new FPUFMAPipe(cfg.sfmaLatency, FType.S))
|
||||||
sfma.io.in.valid := req_valid && ex_ctrl.fma && ex_ctrl.singleOut
|
sfma.io.in.valid := req_valid && ex_ctrl.fma && ex_ctrl.singleOut
|
||||||
sfma.io.in.bits := req
|
sfma.io.in.bits := fuInput(Some(sfma.t))
|
||||||
|
|
||||||
val fpiu = Module(new FPToInt)
|
val fpiu = Module(new FPToInt)
|
||||||
fpiu.io.in.valid := req_valid && (ex_ctrl.toint || ex_ctrl.div || ex_ctrl.sqrt || (ex_ctrl.fastpipe && ex_ctrl.wflags))
|
fpiu.io.in.valid := req_valid && (ex_ctrl.toint || ex_ctrl.div || ex_ctrl.sqrt || (ex_ctrl.fastpipe && ex_ctrl.wflags))
|
||||||
fpiu.io.in.bits := req
|
fpiu.io.in.bits := fuInput(None)
|
||||||
io.store_data := fpiu.io.out.bits.store
|
io.store_data := fpiu.io.out.bits.store
|
||||||
io.toint_data := fpiu.io.out.bits.toint
|
io.toint_data := fpiu.io.out.bits.toint
|
||||||
when(fpiu.io.out.valid && mem_cp_valid && mem_ctrl.toint){
|
when(fpiu.io.out.valid && mem_cp_valid && mem_ctrl.toint){
|
||||||
@ -660,22 +650,20 @@ class FPU(cfg: FPUParams)(implicit p: Parameters) extends FPUModule()(p) {
|
|||||||
|
|
||||||
val ifpu = Module(new IntToFP(2))
|
val ifpu = Module(new IntToFP(2))
|
||||||
ifpu.io.in.valid := req_valid && ex_ctrl.fromint
|
ifpu.io.in.valid := req_valid && ex_ctrl.fromint
|
||||||
ifpu.io.in.bits := req
|
ifpu.io.in.bits := fpiu.io.in.bits
|
||||||
ifpu.io.in.bits.in1 := Mux(ex_cp_valid, io.cp_req.bits.in1, io.fromint_data)
|
ifpu.io.in.bits.in1 := Mux(ex_cp_valid, io.cp_req.bits.in1, io.fromint_data)
|
||||||
|
|
||||||
val fpmu = Module(new FPToFP(2))
|
val fpmu = Module(new FPToFP(2))
|
||||||
fpmu.io.in.valid := req_valid && ex_ctrl.fastpipe
|
fpmu.io.in.valid := req_valid && ex_ctrl.fastpipe
|
||||||
fpmu.io.in.bits := req
|
fpmu.io.in.bits := fpiu.io.in.bits
|
||||||
fpmu.io.lt := fpiu.io.out.bits.lt
|
fpmu.io.lt := fpiu.io.out.bits.lt
|
||||||
|
|
||||||
val divSqrt_wen = Reg(next=Bool(false))
|
val divSqrt_wen = Wire(init = false.B)
|
||||||
val divSqrt_inReady = Wire(init=Bool(false))
|
val divSqrt_inFlight = Wire(init = false.B)
|
||||||
val divSqrt_waddr = Reg(UInt(width = 5))
|
val divSqrt_waddr = Reg(UInt(width = 5))
|
||||||
val divSqrt_single = Reg(Bool())
|
val divSqrt_typeTag = Wire(UInt(width = log2Up(floatTypes.size)))
|
||||||
val divSqrt_wdata = Wire(UInt(width = fLen+1))
|
val divSqrt_wdata = Wire(UInt(width = fLen+1))
|
||||||
val divSqrt_flags = Wire(UInt(width = FPConstants.FLAGS_SZ))
|
val divSqrt_flags = Wire(UInt(width = FPConstants.FLAGS_SZ))
|
||||||
val divSqrt_in_flight = Reg(init=Bool(false))
|
|
||||||
val divSqrt_killed = Reg(Bool())
|
|
||||||
|
|
||||||
// writeback arbitration
|
// writeback arbitration
|
||||||
case class Pipe(p: Module, lat: Int, cond: (FPUCtrlSigs) => Bool, res: FPResult)
|
case class Pipe(p: Module, lat: Int, cond: (FPUCtrlSigs) => Bool, res: FPResult)
|
||||||
@ -686,7 +674,7 @@ class FPU(cfg: FPUParams)(implicit p: Parameters) extends FPUModule()(p) {
|
|||||||
(fLen > 32).option({
|
(fLen > 32).option({
|
||||||
val dfma = Module(new FPUFMAPipe(cfg.dfmaLatency, FType.D))
|
val dfma = Module(new FPUFMAPipe(cfg.dfmaLatency, FType.D))
|
||||||
dfma.io.in.valid := req_valid && ex_ctrl.fma && !ex_ctrl.singleOut
|
dfma.io.in.valid := req_valid && ex_ctrl.fma && !ex_ctrl.singleOut
|
||||||
dfma.io.in.bits := req
|
dfma.io.in.bits := fuInput(Some(dfma.t))
|
||||||
Pipe(dfma, dfma.latency, (c: FPUCtrlSigs) => c.fma && !c.singleOut, dfma.io.out.bits)
|
Pipe(dfma, dfma.latency, (c: FPUCtrlSigs) => c.fma && !c.singleOut, dfma.io.out.bits)
|
||||||
})
|
})
|
||||||
def latencyMask(c: FPUCtrlSigs, offset: Int) = {
|
def latencyMask(c: FPUCtrlSigs, offset: Int) = {
|
||||||
@ -729,7 +717,7 @@ class FPU(cfg: FPUParams)(implicit p: Parameters) extends FPUModule()(p) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
val waddr = Mux(divSqrt_wen, divSqrt_waddr, wbInfo(0).rd)
|
val waddr = Mux(divSqrt_wen, divSqrt_waddr, wbInfo(0).rd)
|
||||||
val wdouble = Mux(divSqrt_wen, !divSqrt_single, !wbInfo(0).single)
|
val wdouble = Mux(divSqrt_wen, divSqrt_typeTag, !wbInfo(0).single)
|
||||||
val wdata = box(Mux(divSqrt_wen, divSqrt_wdata, (pipes.map(_.res.data): Seq[UInt])(wbInfo(0).pipeid)), wdouble)
|
val wdata = box(Mux(divSqrt_wen, divSqrt_wdata, (pipes.map(_.res.data): Seq[UInt])(wbInfo(0).pipeid)), wdouble)
|
||||||
val wexc = (pipes.map(_.res.exc): Seq[UInt])(wbInfo(0).pipeid)
|
val wexc = (pipes.map(_.res.exc): Seq[UInt])(wbInfo(0).pipeid)
|
||||||
when ((!wbInfo(0).cp && wen(0)) || divSqrt_wen) {
|
when ((!wbInfo(0).cp && wen(0)) || divSqrt_wen) {
|
||||||
@ -753,9 +741,9 @@ class FPU(cfg: FPUParams)(implicit p: Parameters) extends FPUModule()(p) {
|
|||||||
Mux(divSqrt_wen, divSqrt_flags, UInt(0)) |
|
Mux(divSqrt_wen, divSqrt_flags, UInt(0)) |
|
||||||
Mux(wen(0), wexc, UInt(0))
|
Mux(wen(0), wexc, UInt(0))
|
||||||
|
|
||||||
val units_busy = mem_reg_valid && (mem_ctrl.div || mem_ctrl.sqrt) && (!divSqrt_inReady || wen.orR)
|
val divSqrt_write_port_busy = (mem_ctrl.div || mem_ctrl.sqrt) && wen.orR
|
||||||
io.fcsr_rdy := !(ex_reg_valid && ex_ctrl.wflags || mem_reg_valid && mem_ctrl.wflags || wb_reg_valid && wb_ctrl.toint || wen.orR || divSqrt_in_flight)
|
io.fcsr_rdy := !(ex_reg_valid && ex_ctrl.wflags || mem_reg_valid && mem_ctrl.wflags || wb_reg_valid && wb_ctrl.toint || wen.orR || divSqrt_inFlight)
|
||||||
io.nack_mem := units_busy || write_port_busy || divSqrt_in_flight
|
io.nack_mem := write_port_busy || divSqrt_write_port_busy || divSqrt_inFlight
|
||||||
io.dec <> fp_decoder.io.sigs
|
io.dec <> fp_decoder.io.sigs
|
||||||
def useScoreboard(f: ((Pipe, Int)) => Bool) = pipes.zipWithIndex.filter(_._1.lat > 3).map(x => f(x)).fold(Bool(false))(_||_)
|
def useScoreboard(f: ((Pipe, Int)) => Bool) = pipes.zipWithIndex.filter(_._1.lat > 3).map(x => f(x)).fold(Bool(false))(_||_)
|
||||||
io.sboard_set := wb_reg_valid && !wb_cp_valid && Reg(next=useScoreboard(_._1.cond(mem_ctrl)) || mem_ctrl.div || mem_ctrl.sqrt)
|
io.sboard_set := wb_reg_valid && !wb_cp_valid && Reg(next=useScoreboard(_._1.cond(mem_ctrl)) || mem_ctrl.div || mem_ctrl.sqrt)
|
||||||
@ -764,48 +752,61 @@ class FPU(cfg: FPUParams)(implicit p: Parameters) extends FPUModule()(p) {
|
|||||||
// we don't currently support round-max-magnitude (rm=4)
|
// we don't currently support round-max-magnitude (rm=4)
|
||||||
io.illegal_rm := io.inst(14,12).isOneOf(5, 6) || io.inst(14,12) === 7 && io.fcsr_rm >= 5
|
io.illegal_rm := io.inst(14,12).isOneOf(5, 6) || io.inst(14,12) === 7 && io.fcsr_rm >= 5
|
||||||
|
|
||||||
divSqrt_wdata := 0
|
|
||||||
divSqrt_flags := 0
|
|
||||||
if (cfg.divSqrt) {
|
if (cfg.divSqrt) {
|
||||||
require(fLen == 64)
|
val divSqrt_killed = Reg(Bool())
|
||||||
val divSqrt_rm = Reg(Bits())
|
|
||||||
val divSqrt_flags_double = Reg(Bits())
|
|
||||||
val divSqrt_wdata_double = Reg(Bits())
|
|
||||||
|
|
||||||
val divSqrt = Module(new hardfloat.DivSqrtRecF64)
|
makeDivSqrt(FType.S, wb_ctrl.singleOut)
|
||||||
divSqrt_inReady := Mux(divSqrt.io.sqrtOp, divSqrt.io.inReady_sqrt, divSqrt.io.inReady_div)
|
fLen match {
|
||||||
val divSqrt_outValid = divSqrt.io.outValid_div || divSqrt.io.outValid_sqrt
|
case 32 =>
|
||||||
divSqrt.io.inValid := mem_reg_valid && (mem_ctrl.div || mem_ctrl.sqrt) && !divSqrt_in_flight
|
case 64 => makeDivSqrt(FType.D, !wb_ctrl.singleOut)
|
||||||
|
}
|
||||||
|
|
||||||
|
def makeDivSqrt(t: FType, en: Bool) = {
|
||||||
|
val divSqrt = Module(new hardfloat.DivSqrtRecFN_small(t.exp, t.sig, 0))
|
||||||
|
divSqrt.io.inValid := en && mem_reg_valid && (mem_ctrl.div || mem_ctrl.sqrt) && !divSqrt_inFlight
|
||||||
divSqrt.io.sqrtOp := mem_ctrl.sqrt
|
divSqrt.io.sqrtOp := mem_ctrl.sqrt
|
||||||
divSqrt.io.a := fpiu.io.out.bits.in.in1
|
divSqrt.io.a := maxType.unsafeConvert(fpiu.io.out.bits.in.in1, t)
|
||||||
divSqrt.io.b := fpiu.io.out.bits.in.in2
|
divSqrt.io.b := maxType.unsafeConvert(fpiu.io.out.bits.in.in2, t)
|
||||||
divSqrt.io.roundingMode := fpiu.io.out.bits.in.rm
|
divSqrt.io.roundingMode := fpiu.io.out.bits.in.rm
|
||||||
divSqrt.io.detectTininess := hardfloat.consts.tininess_afterRounding
|
divSqrt.io.detectTininess := hardfloat.consts.tininess_afterRounding
|
||||||
|
|
||||||
when (divSqrt.io.inValid && divSqrt_inReady) {
|
when (!divSqrt.io.inReady) { divSqrt_inFlight := true } // only 1 in flight
|
||||||
divSqrt_in_flight := true
|
|
||||||
|
when (divSqrt.io.inValid && divSqrt.io.inReady) {
|
||||||
divSqrt_killed := killm
|
divSqrt_killed := killm
|
||||||
divSqrt_single := mem_ctrl.singleOut
|
|
||||||
divSqrt_waddr := mem_reg_inst(11,7)
|
divSqrt_waddr := mem_reg_inst(11,7)
|
||||||
divSqrt_rm := divSqrt.io.roundingMode
|
|
||||||
}
|
}
|
||||||
|
|
||||||
when (divSqrt_outValid) {
|
when (divSqrt.io.outValid_div || divSqrt.io.outValid_sqrt) {
|
||||||
divSqrt_wen := !divSqrt_killed
|
divSqrt_wen := !divSqrt_killed
|
||||||
divSqrt_wdata_double := sanitizeNaN(divSqrt.io.out, FType.D)
|
divSqrt_wdata := sanitizeNaN(divSqrt.io.out, t)
|
||||||
divSqrt_in_flight := false
|
divSqrt_flags := divSqrt.io.exceptionFlags
|
||||||
divSqrt_flags_double := divSqrt.io.exceptionFlags
|
divSqrt_typeTag := typeTag(t)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
val divSqrt_toSingle = Module(new hardfloat.RecFNToRecFN(11, 53, 8, 24))
|
|
||||||
divSqrt_toSingle.io.in := divSqrt_wdata_double
|
|
||||||
divSqrt_toSingle.io.roundingMode := divSqrt_rm
|
|
||||||
divSqrt_toSingle.io.detectTininess := hardfloat.consts.tininess_afterRounding
|
|
||||||
divSqrt_wdata := Mux(divSqrt_single, Cat(divSqrt_wdata_double >> divSqrt_toSingle.io.out.getWidth, sanitizeNaN(divSqrt_toSingle.io.out, FType.S)), divSqrt_wdata_double)
|
|
||||||
divSqrt_flags := divSqrt_flags_double | Mux(divSqrt_single, divSqrt_toSingle.io.exceptionFlags, Bits(0))
|
|
||||||
} else {
|
} else {
|
||||||
when (id_ctrl.div || id_ctrl.sqrt) { io.illegal_rm := true }
|
when (id_ctrl.div || id_ctrl.sqrt) { io.illegal_rm := true }
|
||||||
}
|
}
|
||||||
|
|
||||||
|
def fuInput(minT: Option[FType]): FPInput = {
|
||||||
|
val req = Wire(new FPInput)
|
||||||
|
val tag = !ex_ctrl.singleIn // TODO typeTag
|
||||||
|
req := ex_ctrl
|
||||||
|
req.rm := ex_rm
|
||||||
|
req.in1 := unbox(ex_rs(0), tag, minT)
|
||||||
|
req.in2 := unbox(ex_rs(1), tag, minT)
|
||||||
|
req.in3 := unbox(ex_rs(2), tag, minT)
|
||||||
|
req.typ := ex_reg_inst(21,20)
|
||||||
|
req.fmaCmd := ex_reg_inst(3,2) | (!ex_ctrl.ren3 && ex_reg_inst(27))
|
||||||
|
when (ex_cp_valid) {
|
||||||
|
req := io.cp_req.bits
|
||||||
|
when (io.cp_req.bits.swap23) {
|
||||||
|
req.in2 := io.cp_req.bits.in3
|
||||||
|
req.in3 := io.cp_req.bits.in2
|
||||||
|
}
|
||||||
|
}
|
||||||
|
req
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Mix-ins for constructing tiles that may have an FPU external to the core pipeline */
|
/** Mix-ins for constructing tiles that may have an FPU external to the core pipeline */
|
||||||
|
@ -21,23 +21,16 @@ class ShiftQueue[T <: Data](gen: T,
|
|||||||
private val valid = RegInit(UInt(0, entries))
|
private val valid = RegInit(UInt(0, entries))
|
||||||
private val elts = Reg(Vec(entries, gen))
|
private val elts = Reg(Vec(entries, gen))
|
||||||
|
|
||||||
private val do_enq = Wire(init=io.enq.fire())
|
private val do_enq = io.enq.fire()
|
||||||
private val do_deq = Wire(init=io.deq.fire())
|
private val do_deq = io.deq.fire()
|
||||||
|
|
||||||
when (do_deq) {
|
for (i <- 0 until entries) {
|
||||||
when (!do_enq) { valid := (valid >> 1) }
|
val wdata = if (i == entries-1) io.enq.bits else Mux(valid(i+1), elts(i+1), io.enq.bits)
|
||||||
for (i <- 1 until entries)
|
val shiftDown = if (i == entries-1) false.B else io.deq.ready && valid(i+1)
|
||||||
when (valid(i)) { elts(i-1) := elts(i) }
|
val enqNew = io.enq.fire() && Mux(io.deq.ready, valid(i), !valid(i) && (if (i == 0) true.B else valid(i-1)))
|
||||||
}
|
when (shiftDown || enqNew) { elts(i) := wdata }
|
||||||
when (do_enq && do_deq) {
|
|
||||||
for (i <- 0 until entries)
|
|
||||||
when (valid(i) && (if (i == entries-1) true.B else !valid(i+1))) { elts(i) := io.enq.bits }
|
|
||||||
}
|
|
||||||
when (do_enq && !do_deq) {
|
|
||||||
valid := (valid << 1) | UInt(1)
|
|
||||||
for (i <- 0 until entries)
|
|
||||||
when (!valid(i) && (if (i == 0) true.B else valid(i-1))) { elts(i) := io.enq.bits }
|
|
||||||
}
|
}
|
||||||
|
when (do_enq =/= do_deq) { valid := Mux(do_enq, (valid << 1) | UInt(1), valid >> 1) }
|
||||||
|
|
||||||
io.enq.ready := !valid(entries-1)
|
io.enq.ready := !valid(entries-1)
|
||||||
io.deq.valid := valid(0)
|
io.deq.valid := valid(0)
|
||||||
@ -45,11 +38,7 @@ class ShiftQueue[T <: Data](gen: T,
|
|||||||
|
|
||||||
if (flow) {
|
if (flow) {
|
||||||
when (io.enq.valid) { io.deq.valid := true.B }
|
when (io.enq.valid) { io.deq.valid := true.B }
|
||||||
when (!valid(0)) {
|
when (!valid(0)) { io.deq.bits := io.enq.bits }
|
||||||
io.deq.bits := io.enq.bits
|
|
||||||
do_deq := false.B
|
|
||||||
when (io.deq.ready) { do_enq := false.B }
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (pipe) {
|
if (pipe) {
|
||||||
|
Loading…
Reference in New Issue
Block a user