diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index fe2ab61d..a5cc132d 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -17,8 +17,8 @@ class ioCtrlDpath extends Bundle() val sel_alu2 = UFix(OUTPUT, 3); val fn_dw = Bool(OUTPUT); val fn_alu = UFix(OUTPUT, SZ_ALU_FN); - val mul_val = Bool(OUTPUT); - val mul_kill = Bool(OUTPUT) + val div_mul_val = Bool(OUTPUT) + val div_mul_kill = Bool(OUTPUT) val div_val = Bool(OUTPUT); val div_kill = Bool(OUTPUT) val sel_wa = Bool(OUTPUT); @@ -47,10 +47,7 @@ class ioCtrlDpath extends Bundle() val jalr_eq = Bool(INPUT) val ex_br_type = Bits(OUTPUT, SZ_BR) val ex_br_taken = Bool(INPUT) - val div_rdy = Bool(INPUT); - val div_result_val = Bool(INPUT); - val mul_rdy = Bool(INPUT); - val mul_result_val = Bool(INPUT); + val div_mul_rdy = Bool(INPUT) val mem_ll_wb = Bool(INPUT) val mem_ll_waddr = UFix(INPUT, 5) val ex_waddr = UFix(INPUT, 5); // write addr from execute stage @@ -359,8 +356,7 @@ class Control(implicit conf: RocketConfiguration) extends Component val ex_reg_flush_inst = Reg(resetVal = Bool(false)) val ex_reg_jalr = Reg(resetVal = Bool(false)) val ex_reg_btb_hit = Reg(resetVal = Bool(false)) - val ex_reg_div_val = Reg(resetVal = Bool(false)) - val ex_reg_mul_val = Reg(resetVal = Bool(false)) + val ex_reg_div_mul_val = Reg(resetVal = Bool(false)) val ex_reg_mem_val = Reg(resetVal = Bool(false)) val ex_reg_xcpt = Reg(resetVal = Bool(false)) val ex_reg_fp_val = Reg(resetVal = Bool(false)) @@ -379,8 +375,7 @@ class Control(implicit conf: RocketConfiguration) extends Component val mem_reg_wen = Reg(resetVal = Bool(false)) val mem_reg_fp_wen = Reg(resetVal = Bool(false)) val mem_reg_flush_inst = Reg(resetVal = Bool(false)) - val mem_reg_div_val = Reg(resetVal = Bool(false)) - val mem_reg_mul_val = Reg(resetVal = Bool(false)) + val mem_reg_div_mul_val = Reg(resetVal = Bool(false)) val mem_reg_mem_val = Reg(resetVal = Bool(false)) val mem_reg_xcpt = Reg(resetVal = Bool(false)) val mem_reg_fp_val = Reg(resetVal = Bool(false)) @@ -479,8 +474,7 @@ class Control(implicit conf: RocketConfiguration) extends Component when (ctrl_killd) { ex_reg_jalr := Bool(false) ex_reg_btb_hit := Bool(false); - ex_reg_div_val := Bool(false); - ex_reg_mul_val := Bool(false); + ex_reg_div_mul_val := Bool(false) ex_reg_mem_val := Bool(false); ex_reg_valid := Bool(false); ex_reg_wen := Bool(false); @@ -499,8 +493,7 @@ class Control(implicit conf: RocketConfiguration) extends Component ex_reg_br_type := id_br_type; ex_reg_jalr := id_jalr ex_reg_btb_hit := io.imem.resp.bits.taken - ex_reg_div_val := id_div_val - ex_reg_mul_val := id_mul_val + ex_reg_div_mul_val := id_mul_val || id_div_val ex_reg_mem_val := id_mem_val.toBool; ex_reg_valid := Bool(true) ex_reg_pcr := id_pcr @@ -521,8 +514,7 @@ class Control(implicit conf: RocketConfiguration) extends Component val wb_dcache_miss = wb_reg_mem_val && !io.dmem.resp.valid val replay_ex = wb_dcache_miss && ex_reg_load_use || mem_reg_flush_inst || ex_reg_mem_val && !io.dmem.req.ready || - ex_reg_div_val && !io.dpath.div_rdy || - ex_reg_mul_val && !io.dpath.mul_rdy || + ex_reg_div_mul_val && !io.dpath.div_mul_rdy || mem_reg_replay_next ctrl_killx := take_pc_wb || replay_ex @@ -535,8 +527,7 @@ class Control(implicit conf: RocketConfiguration) extends Component mem_reg_replay := replay_ex && !take_pc_wb; mem_reg_xcpt_interrupt := ex_reg_xcpt_interrupt && !take_pc_wb when (ex_xcpt) { mem_reg_cause := ex_cause } - mem_reg_div_val := ex_reg_div_val && io.dpath.div_rdy - mem_reg_mul_val := ex_reg_mul_val && io.dpath.mul_rdy + mem_reg_div_mul_val := ex_reg_div_mul_val && io.dpath.div_mul_rdy when (ctrl_killx) { mem_reg_valid := Bool(false); @@ -603,7 +594,7 @@ class Control(implicit conf: RocketConfiguration) extends Component wb_reg_eret := mem_reg_eret && !mem_reg_replay wb_reg_flush_inst := mem_reg_flush_inst; wb_reg_mem_val := mem_reg_mem_val - wb_reg_div_mul_val := mem_reg_div_val || mem_reg_mul_val + wb_reg_div_mul_val := mem_reg_div_mul_val wb_reg_fp_val := mem_reg_fp_val wb_reg_replay_next := mem_reg_replay_next } @@ -674,7 +665,7 @@ class Control(implicit conf: RocketConfiguration) extends Component io.fpu.dec.ren2 && id_raddr2 === io.dpath.ex_waddr || io.fpu.dec.ren3 && id_raddr3 === io.dpath.ex_waddr || io.fpu.dec.wen && id_waddr === io.dpath.ex_waddr) - val id_ex_hazard = data_hazard_ex && (ex_reg_mem_val || ex_reg_div_val || ex_reg_mul_val || ex_reg_fp_val) || + val id_ex_hazard = data_hazard_ex && (ex_reg_mem_val || ex_reg_div_mul_val || ex_reg_fp_val) || fp_data_hazard_ex && (ex_reg_mem_val || ex_reg_fp_val) // stall for RAW/WAW hazards on LB/LH and mul/div in memory stage. @@ -691,7 +682,7 @@ class Control(implicit conf: RocketConfiguration) extends Component io.fpu.dec.ren2 && id_raddr2 === io.dpath.mem_waddr || io.fpu.dec.ren3 && id_raddr3 === io.dpath.mem_waddr || io.fpu.dec.wen && id_waddr === io.dpath.mem_waddr) - val id_mem_hazard = data_hazard_mem && (mem_reg_mem_val && mem_mem_cmd_bh || mem_reg_div_val || mem_reg_mul_val || mem_reg_fp_val) || + val id_mem_hazard = data_hazard_mem && (mem_reg_mem_val && mem_mem_cmd_bh || mem_reg_div_mul_val || mem_reg_fp_val) || fp_data_hazard_mem && mem_reg_fp_val id_load_use := mem_reg_mem_val && (data_hazard_mem || fp_data_hazard_mem) @@ -731,10 +722,8 @@ class Control(implicit conf: RocketConfiguration) extends Component io.dpath.sel_alu2 := id_sel_alu2.toUFix io.dpath.fn_dw := id_fn_dw.toBool; io.dpath.fn_alu := id_fn_alu.toUFix - io.dpath.div_val := ex_reg_div_val - io.dpath.div_kill := mem_reg_div_val && killm_common - io.dpath.mul_val := ex_reg_mul_val - io.dpath.mul_kill := mem_reg_mul_val && killm_common + io.dpath.div_mul_val := ex_reg_div_mul_val + io.dpath.div_mul_kill := mem_reg_div_mul_val && killm_common io.dpath.ex_fp_val:= ex_reg_fp_val; io.dpath.mem_fp_val:= mem_reg_fp_val; io.dpath.ex_jalr := ex_reg_jalr diff --git a/rocket/src/main/scala/divider.scala b/rocket/src/main/scala/divider.scala index f7e97185..a15c284f 100644 --- a/rocket/src/main/scala/divider.scala +++ b/rocket/src/main/scala/divider.scala @@ -4,62 +4,83 @@ import Chisel._ import Node._ import Constants._ import ALU._ +import Util._ -class Divider(earlyOut: Boolean = false)(implicit conf: RocketConfiguration) extends Component { +class MulDiv(mulUnroll: Int = 1, earlyOut: Boolean = false)(implicit conf: RocketConfiguration) extends Component { val io = new MultiplierIO val w = io.req.bits.in1.getWidth + val mulw = (w+1+mulUnroll-1)/mulUnroll*mulUnroll - val s_ready :: s_neg_inputs :: s_busy :: s_neg_outputs :: s_done :: Nil = Enum(5) { UFix() }; + val s_ready :: s_neg_inputs :: s_mul_busy :: s_div_busy :: s_move_rem :: s_neg_output :: s_done :: Nil = Enum(7) { UFix() }; val state = Reg(resetVal = s_ready); - val count = Reg() { UFix(width = log2Up(w+1)) } - val divby0 = Reg() { Bool() }; - val neg_quo = Reg() { Bool() }; - val neg_rem = Reg() { Bool() }; - val rem = Reg() { Bool() }; - val half = Reg() { Bool() }; - val r_req = Reg{io.req.bits.clone} - - val divisor = Reg() { Bits() } - val remainder = Reg() { Bits(width = 2*w+1) } - val subtractor = remainder(2*w,w) - divisor - - val dw = io.req.bits.dw - val fn = io.req.bits.fn - val tc = isMulFN(fn, FN_DIV) || isMulFN(fn, FN_REM) + val req = Reg{io.req.bits.clone} + val count = Reg{UFix(width = log2Up(w+1))} + val divby0 = Reg{Bool()} + val neg_out = Reg{Bool()} + val divisor = Reg{Bits(width = w+1)} // div only needs w bits + val remainder = Reg{Bits(width = 2*mulw+1)} // div only needs 2*w+1 bits - val lhs_sign = tc && Mux(dw === DW_64, io.req.bits.in1(w-1), io.req.bits.in1(w/2-1)) - val lhs_hi = Mux(dw === DW_64, io.req.bits.in1(w-1,w/2), Fill(w/2, lhs_sign)) - val lhs_in = Cat(lhs_hi, io.req.bits.in1(w/2-1,0)) - - val rhs_sign = tc && Mux(dw === DW_64, io.req.bits.in2(w-1), io.req.bits.in2(w/2-1)) - val rhs_hi = Mux(dw === DW_64, io.req.bits.in2(w-1,w/2), Fill(w/2, rhs_sign)) - val rhs_in = Cat(rhs_hi, io.req.bits.in2(w/2-1,0)) + def sext(x: Bits, cmds: Vec[Bits]) = { + val sign = Mux(io.req.bits.dw === DW_64, x(w-1), x(w/2-1)) && cmds.contains(io.req.bits.fn) + val hi = Mux(io.req.bits.dw === DW_64, x(w-1,w/2), Fill(w/2, sign)) + (Cat(hi, x(w/2-1,0)), sign) + } + val (lhs_in, lhs_sign) = sext(io.req.bits.in1, AVec(FN_DIV, FN_REM, FN_MULH, FN_MULHSU)) + val (rhs_in, rhs_sign) = sext(io.req.bits.in2, AVec(FN_DIV, FN_REM, FN_MULH)) + val subtractor = remainder(2*w,w) - divisor(w-1,0) + when (state === s_neg_inputs) { - state := s_busy + state := s_div_busy when (remainder(w-1)) { - remainder := Cat(remainder(2*w, w), -remainder(w-1,0)) + remainder := -remainder(w-1,0) } - when (divisor(w-1)) { + when (divisor(w-1) && !AVec(FN_MULHU, FN_MULHSU).contains(req.fn)) { divisor := subtractor(w-1,0) } } - when (state === s_neg_outputs) { + when (state === s_neg_output) { + remainder := -remainder(w-1,0) state := s_done - when (neg_rem && neg_quo && !divby0) { - remainder := Cat(-remainder(2*w, w+1), remainder(w), -remainder(w-1,0)) + } + when (state === s_move_rem) { + remainder := remainder(2*w, w+1) + state := Mux(neg_out, s_neg_output, s_done) + } + when (state === s_mul_busy) { + val carryIn = remainder(w) + val mplier = Cat(remainder(2*mulw,w+1),remainder(w-1,0)).toFix + val mpcand = divisor.toFix + val prod = mplier(mulUnroll-1,0) * mpcand + Mux(carryIn, mpcand, Fix(0)) + val sum = Cat(mplier(2*mulw-1,mulw) + prod, mplier(mulw-1,mulUnroll)) + val carryOut = mplier(mulUnroll-1) + remainder := Cat(sum(sum.getWidth-1,w), carryOut, sum(w-1,0)).toFix + + val cycles = mulw/mulUnroll + val shift1 = (UFix(cycles)-count)*mulUnroll + val shift = shift1(log2Up(w)-1,0) + val mask = (UFix(1) << shift) - 1 + val eOut = shift1 < w && !((mplier(w-1,0).toBits ^ carryIn.toFix) & mask).orR + val shifted = mplier >> shift + when (Bool(earlyOut) && eOut) { + remainder := Cat(shifted(sum.getWidth-1,w), carryOut, shifted(w-1,0)).toFix } - .elsewhen (neg_quo && !divby0) { - remainder := Cat(remainder(2*w, w), -remainder(w-1,0)) - } - .elsewhen (neg_rem) { - remainder := Cat(-remainder(2*w, w+1), remainder(w,0)) + + count := count + 1 + when (count === cycles-1 || Bool(earlyOut) && eOut) { + state := s_done + when (AVec(FN_MULH, FN_MULHU, FN_MULHSU) contains req.fn) { + state := s_move_rem + } } } - when (state === s_busy) { + when (state === s_div_busy) { when (count === UFix(w)) { - state := Mux(neg_quo || neg_rem, s_neg_outputs, s_done) + state := Mux(neg_out && !divby0, s_neg_output, s_done) + when (AVec(FN_REM, FN_REMU) contains req.fn) { + state := s_move_rem + } } count := count + UFix(1) @@ -69,13 +90,104 @@ class Divider(earlyOut: Boolean = false)(implicit conf: RocketConfiguration) ext val divisorMSB = Log2(divisor, w) val dividendMSB = Log2(remainder(w-1,0), w) - val eOutPos = UFix(w-1, log2Up(2*w)) + divisorMSB - val eOut = count === UFix(0) && eOutPos > dividendMSB && (divisorMSB != UFix(0) || divisor(0)) + val eOutPos = UFix(w-1, log2Up(2*w)) + divisorMSB - dividendMSB + val eOut = count === UFix(0) && eOutPos > 0 && (divisorMSB != UFix(0) || divisor(0)) when (Bool(earlyOut) && eOut) { - val eOutDist = eOutPos - dividendMSB - val shift = Mux(divisorMSB >= dividendMSB, UFix(w-1), eOutDist(log2Up(w)-1,0)) + val shift = eOutPos(log2Up(w)-1,0) remainder := remainder(w-1,0) << shift count := shift + when (eOutPos(log2Up(w))) { + remainder := remainder(w-1,0) << w-1 + count := w-1 + } + } + } + when (io.resp.fire() || io.kill) { + state := s_ready + } + when (io.req.fire()) { + val isMul = AVec(FN_MUL, FN_MULH, FN_MULHU, FN_MULHSU).contains(io.req.bits.fn) + val isRem = AVec(FN_REM, FN_REMU).contains(io.req.bits.fn) + state := Mux(isMul, s_mul_busy, Mux(lhs_sign || rhs_sign, s_neg_inputs, s_div_busy)) + count := UFix(0) + neg_out := !isMul && Mux(isRem, lhs_sign, lhs_sign != rhs_sign) + divby0 := true + divisor := Cat(rhs_sign, rhs_in) + remainder := Cat(Fill(mulw-w, isMul && lhs_sign), Bool(false), lhs_in) + req := io.req.bits + } + + io.resp.bits := req + io.resp.bits.data := Mux(req.dw === DW_32, Cat(Fill(w/2, remainder(w/2-1)), remainder(w/2-1,0)), remainder(w-1,0)) + io.resp.valid := state === s_done + io.req.ready := state === s_ready +} + +class Divider(earlyOut: Boolean = false)(implicit conf: RocketConfiguration) extends Component { + val io = new MultiplierIO + val w = io.req.bits.in1.getWidth + + val s_ready :: s_neg_inputs :: s_busy :: s_move_rem :: s_neg_output :: s_done :: Nil = Enum(6) { UFix() }; + val state = Reg(resetVal = s_ready); + + val count = Reg() { UFix(width = log2Up(w+1)) } + val divby0 = Reg() { Bool() }; + val neg_out = Reg() { Bool() }; + val r_req = Reg{io.req.bits.clone} + + val divisor = Reg() { Bits() } + val remainder = Reg() { Bits(width = 2*w+1) } + val subtractor = remainder(2*w,w) - divisor + + def sext(x: Bits, cmds: Vec[Bits]) = { + val sign = Mux(io.req.bits.dw === DW_64, x(w-1), x(w/2-1)) && cmds.contains(io.req.bits.fn) + val hi = Mux(io.req.bits.dw === DW_64, x(w-1,w/2), Fill(w/2, sign)) + (Cat(hi, x(w/2-1,0)), sign) + } + val (lhs_in, lhs_sign) = sext(io.req.bits.in1, AVec(FN_DIV, FN_REM)) + val (rhs_in, rhs_sign) = sext(io.req.bits.in2, AVec(FN_DIV, FN_REM)) + + val r_isRem = isMulFN(r_req.fn, FN_REM) || isMulFN(r_req.fn, FN_REMU) + + when (state === s_neg_inputs) { + state := s_busy + when (remainder(w-1)) { + remainder := -remainder(w-1,0) + } + when (divisor(w-1)) { + divisor := subtractor(w-1,0) + } + } + when (state === s_neg_output) { + remainder := -remainder(w-1,0) + state := s_done + } + when (state === s_move_rem) { + remainder := remainder(2*w, w+1) + state := Mux(neg_out, s_neg_output, s_done) + } + when (state === s_busy) { + when (count === UFix(w)) { + state := Mux(r_isRem, s_move_rem, Mux(neg_out && !divby0, s_neg_output, s_done)) + } + count := count + UFix(1) + + val msb = subtractor(w) + divby0 := divby0 && !msb + remainder := Cat(Mux(msb, remainder(2*w-1,w), subtractor(w-1,0)), remainder(w-1,0), !msb) + + val divisorMSB = Log2(divisor, w) + val dividendMSB = Log2(remainder(w-1,0), w) + val eOutPos = UFix(w-1, log2Up(2*w)) + divisorMSB - dividendMSB + val eOut = count === UFix(0) && eOutPos > 0 && (divisorMSB != UFix(0) || divisor(0)) + when (Bool(earlyOut) && eOut) { + val shift = eOutPos(log2Up(w)-1,0) + remainder := remainder(w-1,0) << shift + count := shift + when (eOutPos(log2Up(w))) { + remainder := remainder(w-1,0) << w-1 + count := w-1 + } } } when (io.resp.fire() || io.kill) { @@ -84,20 +196,15 @@ class Divider(earlyOut: Boolean = false)(implicit conf: RocketConfiguration) ext when (io.req.fire()) { state := Mux(lhs_sign || rhs_sign, s_neg_inputs, s_busy) count := UFix(0) - half := (dw === DW_32); - neg_quo := lhs_sign != rhs_sign - neg_rem := lhs_sign - rem := isMulFN(fn, FN_REM) || isMulFN(fn, FN_REMU) - divby0 := Bool(true); + neg_out := Mux(AVec(FN_REM, FN_REMU).contains(io.req.bits.fn), lhs_sign, lhs_sign != rhs_sign) + divby0 := true divisor := rhs_in remainder := lhs_in r_req := io.req.bits } - val result = Mux(rem, remainder(w+w, w+1), remainder(w-1,0)) - io.resp.bits := r_req - io.resp.bits.data := Mux(half, Cat(Fill(w/2, result(w/2-1)), result(w/2-1,0)), result) + io.resp.bits.data := Mux(r_req.dw === DW_32, Cat(Fill(w/2, remainder(w/2-1)), remainder(w/2-1,0)), remainder(w-1,0)) io.resp.valid := state === s_done io.req.ready := state === s_ready } diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index 3ceca7e8..ee6630e5 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -158,31 +158,17 @@ class Datapath(implicit conf: RocketConfiguration) extends Component alu.io.in2 := ex_op2.toUFix alu.io.in1 := ex_rs1.toUFix - // divider - val div = new Divider(earlyOut = true) - div.io.req.valid := io.ctrl.div_val + // multiplier and divider + val div = new MulDiv(mulUnroll = 4, earlyOut = true) + div.io.req.valid := io.ctrl.div_mul_val div.io.req.bits.dw := ex_reg_ctrl_fn_dw div.io.req.bits.fn := ex_reg_ctrl_fn_alu div.io.req.bits.in1 := ex_rs1 div.io.req.bits.in2 := ex_rs2 div.io.req.bits.tag := ex_reg_waddr - div.io.kill := io.ctrl.div_kill + div.io.kill := io.ctrl.div_mul_kill div.io.resp.ready := Bool(true) - io.ctrl.div_rdy := div.io.req.ready - io.ctrl.div_result_val := div.io.resp.valid - - // multiplier - val mul = new Multiplier(unroll = 4, earlyOut = true) - mul.io.req.valid := io.ctrl.mul_val - mul.io.req.bits.dw := ex_reg_ctrl_fn_dw - mul.io.req.bits.fn := ex_reg_ctrl_fn_alu - mul.io.req.bits.in1 := ex_rs1 - mul.io.req.bits.in2 := ex_rs2 - mul.io.req.bits.tag := ex_reg_waddr - mul.io.kill := io.ctrl.mul_kill - mul.io.resp.ready := Bool(true) - io.ctrl.mul_rdy := mul.io.req.ready - io.ctrl.mul_result_val := mul.io.resp.valid + io.ctrl.div_mul_rdy := div.io.req.ready io.fpu.fromint_data := ex_rs1 io.ctrl.ex_waddr := ex_reg_waddr @@ -266,17 +252,10 @@ class Datapath(implicit conf: RocketConfiguration) extends Component val dmem_resp_replay = io.dmem.resp.bits.replay && dmem_resp_xpu val mem_ll_wdata = Bits() - mem_ll_wdata := mul.io.resp.bits.data - io.ctrl.mem_ll_waddr := mul.io.resp.bits.tag - io.ctrl.mem_ll_wb := mul.io.resp.valid - when (div.io.resp.valid) { - mul.io.resp.ready := Bool(false) - mem_ll_wdata := div.io.resp.bits.data - io.ctrl.mem_ll_waddr := div.io.resp.bits.tag - io.ctrl.mem_ll_wb := Bool(true) - } + mem_ll_wdata := div.io.resp.bits.data + io.ctrl.mem_ll_waddr := div.io.resp.bits.tag + io.ctrl.mem_ll_wb := div.io.resp.valid when (dmem_resp_replay) { - mul.io.resp.ready := Bool(false) div.io.resp.ready := Bool(false) mem_ll_wdata := io.dmem.resp.bits.data_subword io.ctrl.mem_ll_waddr := dmem_resp_waddr diff --git a/rocket/src/main/scala/dpath_alu.scala b/rocket/src/main/scala/dpath_alu.scala index 14ec1426..5a29ab20 100644 --- a/rocket/src/main/scala/dpath_alu.scala +++ b/rocket/src/main/scala/dpath_alu.scala @@ -9,17 +9,17 @@ object ALU { val SZ_ALU_FN = 4 val FN_X = Bits("b????") - val FN_ADD = UFix(0) - val FN_SL = UFix(1) - val FN_XOR = UFix(4) - val FN_OR = UFix(6) - val FN_AND = UFix(7) - val FN_SR = UFix(5) - val FN_SUB = UFix(8) - val FN_SLT = UFix(10) - val FN_SLTU = UFix(11) - val FN_SRA = UFix(13) - val FN_OP2 = UFix(15) + val FN_ADD = Bits(0) + val FN_SL = Bits(1) + val FN_XOR = Bits(4) + val FN_OR = Bits(6) + val FN_AND = Bits(7) + val FN_SR = Bits(5) + val FN_SUB = Bits(8) + val FN_SLT = Bits(10) + val FN_SLTU = Bits(11) + val FN_SRA = Bits(13) + val FN_OP2 = Bits(15) val FN_DIV = FN_XOR val FN_DIVU = FN_SR