1
0

merge multiplier and divider

This commit is contained in:
Andrew Waterman 2012-12-12 02:22:47 -08:00
parent c921fc34a9
commit 05f19b21d0
4 changed files with 189 additions and 114 deletions

View File

@ -17,8 +17,8 @@ class ioCtrlDpath extends Bundle()
val sel_alu2 = UFix(OUTPUT, 3);
val fn_dw = Bool(OUTPUT);
val fn_alu = UFix(OUTPUT, SZ_ALU_FN);
val mul_val = Bool(OUTPUT);
val mul_kill = Bool(OUTPUT)
val div_mul_val = Bool(OUTPUT)
val div_mul_kill = Bool(OUTPUT)
val div_val = Bool(OUTPUT);
val div_kill = Bool(OUTPUT)
val sel_wa = Bool(OUTPUT);
@ -47,10 +47,7 @@ class ioCtrlDpath extends Bundle()
val jalr_eq = Bool(INPUT)
val ex_br_type = Bits(OUTPUT, SZ_BR)
val ex_br_taken = Bool(INPUT)
val div_rdy = Bool(INPUT);
val div_result_val = Bool(INPUT);
val mul_rdy = Bool(INPUT);
val mul_result_val = Bool(INPUT);
val div_mul_rdy = Bool(INPUT)
val mem_ll_wb = Bool(INPUT)
val mem_ll_waddr = UFix(INPUT, 5)
val ex_waddr = UFix(INPUT, 5); // write addr from execute stage
@ -359,8 +356,7 @@ class Control(implicit conf: RocketConfiguration) extends Component
val ex_reg_flush_inst = Reg(resetVal = Bool(false))
val ex_reg_jalr = Reg(resetVal = Bool(false))
val ex_reg_btb_hit = Reg(resetVal = Bool(false))
val ex_reg_div_val = Reg(resetVal = Bool(false))
val ex_reg_mul_val = Reg(resetVal = Bool(false))
val ex_reg_div_mul_val = Reg(resetVal = Bool(false))
val ex_reg_mem_val = Reg(resetVal = Bool(false))
val ex_reg_xcpt = Reg(resetVal = Bool(false))
val ex_reg_fp_val = Reg(resetVal = Bool(false))
@ -379,8 +375,7 @@ class Control(implicit conf: RocketConfiguration) extends Component
val mem_reg_wen = Reg(resetVal = Bool(false))
val mem_reg_fp_wen = Reg(resetVal = Bool(false))
val mem_reg_flush_inst = Reg(resetVal = Bool(false))
val mem_reg_div_val = Reg(resetVal = Bool(false))
val mem_reg_mul_val = Reg(resetVal = Bool(false))
val mem_reg_div_mul_val = Reg(resetVal = Bool(false))
val mem_reg_mem_val = Reg(resetVal = Bool(false))
val mem_reg_xcpt = Reg(resetVal = Bool(false))
val mem_reg_fp_val = Reg(resetVal = Bool(false))
@ -479,8 +474,7 @@ class Control(implicit conf: RocketConfiguration) extends Component
when (ctrl_killd) {
ex_reg_jalr := Bool(false)
ex_reg_btb_hit := Bool(false);
ex_reg_div_val := Bool(false);
ex_reg_mul_val := Bool(false);
ex_reg_div_mul_val := Bool(false)
ex_reg_mem_val := Bool(false);
ex_reg_valid := Bool(false);
ex_reg_wen := Bool(false);
@ -499,8 +493,7 @@ class Control(implicit conf: RocketConfiguration) extends Component
ex_reg_br_type := id_br_type;
ex_reg_jalr := id_jalr
ex_reg_btb_hit := io.imem.resp.bits.taken
ex_reg_div_val := id_div_val
ex_reg_mul_val := id_mul_val
ex_reg_div_mul_val := id_mul_val || id_div_val
ex_reg_mem_val := id_mem_val.toBool;
ex_reg_valid := Bool(true)
ex_reg_pcr := id_pcr
@ -521,8 +514,7 @@ class Control(implicit conf: RocketConfiguration) extends Component
val wb_dcache_miss = wb_reg_mem_val && !io.dmem.resp.valid
val replay_ex = wb_dcache_miss && ex_reg_load_use || mem_reg_flush_inst ||
ex_reg_mem_val && !io.dmem.req.ready ||
ex_reg_div_val && !io.dpath.div_rdy ||
ex_reg_mul_val && !io.dpath.mul_rdy ||
ex_reg_div_mul_val && !io.dpath.div_mul_rdy ||
mem_reg_replay_next
ctrl_killx := take_pc_wb || replay_ex
@ -535,8 +527,7 @@ class Control(implicit conf: RocketConfiguration) extends Component
mem_reg_replay := replay_ex && !take_pc_wb;
mem_reg_xcpt_interrupt := ex_reg_xcpt_interrupt && !take_pc_wb
when (ex_xcpt) { mem_reg_cause := ex_cause }
mem_reg_div_val := ex_reg_div_val && io.dpath.div_rdy
mem_reg_mul_val := ex_reg_mul_val && io.dpath.mul_rdy
mem_reg_div_mul_val := ex_reg_div_mul_val && io.dpath.div_mul_rdy
when (ctrl_killx) {
mem_reg_valid := Bool(false);
@ -603,7 +594,7 @@ class Control(implicit conf: RocketConfiguration) extends Component
wb_reg_eret := mem_reg_eret && !mem_reg_replay
wb_reg_flush_inst := mem_reg_flush_inst;
wb_reg_mem_val := mem_reg_mem_val
wb_reg_div_mul_val := mem_reg_div_val || mem_reg_mul_val
wb_reg_div_mul_val := mem_reg_div_mul_val
wb_reg_fp_val := mem_reg_fp_val
wb_reg_replay_next := mem_reg_replay_next
}
@ -674,7 +665,7 @@ class Control(implicit conf: RocketConfiguration) extends Component
io.fpu.dec.ren2 && id_raddr2 === io.dpath.ex_waddr ||
io.fpu.dec.ren3 && id_raddr3 === io.dpath.ex_waddr ||
io.fpu.dec.wen && id_waddr === io.dpath.ex_waddr)
val id_ex_hazard = data_hazard_ex && (ex_reg_mem_val || ex_reg_div_val || ex_reg_mul_val || ex_reg_fp_val) ||
val id_ex_hazard = data_hazard_ex && (ex_reg_mem_val || ex_reg_div_mul_val || ex_reg_fp_val) ||
fp_data_hazard_ex && (ex_reg_mem_val || ex_reg_fp_val)
// stall for RAW/WAW hazards on LB/LH and mul/div in memory stage.
@ -691,7 +682,7 @@ class Control(implicit conf: RocketConfiguration) extends Component
io.fpu.dec.ren2 && id_raddr2 === io.dpath.mem_waddr ||
io.fpu.dec.ren3 && id_raddr3 === io.dpath.mem_waddr ||
io.fpu.dec.wen && id_waddr === io.dpath.mem_waddr)
val id_mem_hazard = data_hazard_mem && (mem_reg_mem_val && mem_mem_cmd_bh || mem_reg_div_val || mem_reg_mul_val || mem_reg_fp_val) ||
val id_mem_hazard = data_hazard_mem && (mem_reg_mem_val && mem_mem_cmd_bh || mem_reg_div_mul_val || mem_reg_fp_val) ||
fp_data_hazard_mem && mem_reg_fp_val
id_load_use := mem_reg_mem_val && (data_hazard_mem || fp_data_hazard_mem)
@ -731,10 +722,8 @@ class Control(implicit conf: RocketConfiguration) extends Component
io.dpath.sel_alu2 := id_sel_alu2.toUFix
io.dpath.fn_dw := id_fn_dw.toBool;
io.dpath.fn_alu := id_fn_alu.toUFix
io.dpath.div_val := ex_reg_div_val
io.dpath.div_kill := mem_reg_div_val && killm_common
io.dpath.mul_val := ex_reg_mul_val
io.dpath.mul_kill := mem_reg_mul_val && killm_common
io.dpath.div_mul_val := ex_reg_div_mul_val
io.dpath.div_mul_kill := mem_reg_div_mul_val && killm_common
io.dpath.ex_fp_val:= ex_reg_fp_val;
io.dpath.mem_fp_val:= mem_reg_fp_val;
io.dpath.ex_jalr := ex_reg_jalr

View File

@ -4,62 +4,83 @@ import Chisel._
import Node._
import Constants._
import ALU._
import Util._
class Divider(earlyOut: Boolean = false)(implicit conf: RocketConfiguration) extends Component {
class MulDiv(mulUnroll: Int = 1, earlyOut: Boolean = false)(implicit conf: RocketConfiguration) extends Component {
val io = new MultiplierIO
val w = io.req.bits.in1.getWidth
val mulw = (w+1+mulUnroll-1)/mulUnroll*mulUnroll
val s_ready :: s_neg_inputs :: s_busy :: s_neg_outputs :: s_done :: Nil = Enum(5) { UFix() };
val s_ready :: s_neg_inputs :: s_mul_busy :: s_div_busy :: s_move_rem :: s_neg_output :: s_done :: Nil = Enum(7) { UFix() };
val state = Reg(resetVal = s_ready);
val count = Reg() { UFix(width = log2Up(w+1)) }
val divby0 = Reg() { Bool() };
val neg_quo = Reg() { Bool() };
val neg_rem = Reg() { Bool() };
val rem = Reg() { Bool() };
val half = Reg() { Bool() };
val r_req = Reg{io.req.bits.clone}
val divisor = Reg() { Bits() }
val remainder = Reg() { Bits(width = 2*w+1) }
val subtractor = remainder(2*w,w) - divisor
val dw = io.req.bits.dw
val fn = io.req.bits.fn
val tc = isMulFN(fn, FN_DIV) || isMulFN(fn, FN_REM)
val req = Reg{io.req.bits.clone}
val count = Reg{UFix(width = log2Up(w+1))}
val divby0 = Reg{Bool()}
val neg_out = Reg{Bool()}
val divisor = Reg{Bits(width = w+1)} // div only needs w bits
val remainder = Reg{Bits(width = 2*mulw+1)} // div only needs 2*w+1 bits
val lhs_sign = tc && Mux(dw === DW_64, io.req.bits.in1(w-1), io.req.bits.in1(w/2-1))
val lhs_hi = Mux(dw === DW_64, io.req.bits.in1(w-1,w/2), Fill(w/2, lhs_sign))
val lhs_in = Cat(lhs_hi, io.req.bits.in1(w/2-1,0))
val rhs_sign = tc && Mux(dw === DW_64, io.req.bits.in2(w-1), io.req.bits.in2(w/2-1))
val rhs_hi = Mux(dw === DW_64, io.req.bits.in2(w-1,w/2), Fill(w/2, rhs_sign))
val rhs_in = Cat(rhs_hi, io.req.bits.in2(w/2-1,0))
def sext(x: Bits, cmds: Vec[Bits]) = {
val sign = Mux(io.req.bits.dw === DW_64, x(w-1), x(w/2-1)) && cmds.contains(io.req.bits.fn)
val hi = Mux(io.req.bits.dw === DW_64, x(w-1,w/2), Fill(w/2, sign))
(Cat(hi, x(w/2-1,0)), sign)
}
val (lhs_in, lhs_sign) = sext(io.req.bits.in1, AVec(FN_DIV, FN_REM, FN_MULH, FN_MULHSU))
val (rhs_in, rhs_sign) = sext(io.req.bits.in2, AVec(FN_DIV, FN_REM, FN_MULH))
val subtractor = remainder(2*w,w) - divisor(w-1,0)
when (state === s_neg_inputs) {
state := s_busy
state := s_div_busy
when (remainder(w-1)) {
remainder := Cat(remainder(2*w, w), -remainder(w-1,0))
remainder := -remainder(w-1,0)
}
when (divisor(w-1)) {
when (divisor(w-1) && !AVec(FN_MULHU, FN_MULHSU).contains(req.fn)) {
divisor := subtractor(w-1,0)
}
}
when (state === s_neg_outputs) {
when (state === s_neg_output) {
remainder := -remainder(w-1,0)
state := s_done
when (neg_rem && neg_quo && !divby0) {
remainder := Cat(-remainder(2*w, w+1), remainder(w), -remainder(w-1,0))
}
when (state === s_move_rem) {
remainder := remainder(2*w, w+1)
state := Mux(neg_out, s_neg_output, s_done)
}
when (state === s_mul_busy) {
val carryIn = remainder(w)
val mplier = Cat(remainder(2*mulw,w+1),remainder(w-1,0)).toFix
val mpcand = divisor.toFix
val prod = mplier(mulUnroll-1,0) * mpcand + Mux(carryIn, mpcand, Fix(0))
val sum = Cat(mplier(2*mulw-1,mulw) + prod, mplier(mulw-1,mulUnroll))
val carryOut = mplier(mulUnroll-1)
remainder := Cat(sum(sum.getWidth-1,w), carryOut, sum(w-1,0)).toFix
val cycles = mulw/mulUnroll
val shift1 = (UFix(cycles)-count)*mulUnroll
val shift = shift1(log2Up(w)-1,0)
val mask = (UFix(1) << shift) - 1
val eOut = shift1 < w && !((mplier(w-1,0).toBits ^ carryIn.toFix) & mask).orR
val shifted = mplier >> shift
when (Bool(earlyOut) && eOut) {
remainder := Cat(shifted(sum.getWidth-1,w), carryOut, shifted(w-1,0)).toFix
}
.elsewhen (neg_quo && !divby0) {
remainder := Cat(remainder(2*w, w), -remainder(w-1,0))
}
.elsewhen (neg_rem) {
remainder := Cat(-remainder(2*w, w+1), remainder(w,0))
count := count + 1
when (count === cycles-1 || Bool(earlyOut) && eOut) {
state := s_done
when (AVec(FN_MULH, FN_MULHU, FN_MULHSU) contains req.fn) {
state := s_move_rem
}
}
}
when (state === s_busy) {
when (state === s_div_busy) {
when (count === UFix(w)) {
state := Mux(neg_quo || neg_rem, s_neg_outputs, s_done)
state := Mux(neg_out && !divby0, s_neg_output, s_done)
when (AVec(FN_REM, FN_REMU) contains req.fn) {
state := s_move_rem
}
}
count := count + UFix(1)
@ -69,13 +90,104 @@ class Divider(earlyOut: Boolean = false)(implicit conf: RocketConfiguration) ext
val divisorMSB = Log2(divisor, w)
val dividendMSB = Log2(remainder(w-1,0), w)
val eOutPos = UFix(w-1, log2Up(2*w)) + divisorMSB
val eOut = count === UFix(0) && eOutPos > dividendMSB && (divisorMSB != UFix(0) || divisor(0))
val eOutPos = UFix(w-1, log2Up(2*w)) + divisorMSB - dividendMSB
val eOut = count === UFix(0) && eOutPos > 0 && (divisorMSB != UFix(0) || divisor(0))
when (Bool(earlyOut) && eOut) {
val eOutDist = eOutPos - dividendMSB
val shift = Mux(divisorMSB >= dividendMSB, UFix(w-1), eOutDist(log2Up(w)-1,0))
val shift = eOutPos(log2Up(w)-1,0)
remainder := remainder(w-1,0) << shift
count := shift
when (eOutPos(log2Up(w))) {
remainder := remainder(w-1,0) << w-1
count := w-1
}
}
}
when (io.resp.fire() || io.kill) {
state := s_ready
}
when (io.req.fire()) {
val isMul = AVec(FN_MUL, FN_MULH, FN_MULHU, FN_MULHSU).contains(io.req.bits.fn)
val isRem = AVec(FN_REM, FN_REMU).contains(io.req.bits.fn)
state := Mux(isMul, s_mul_busy, Mux(lhs_sign || rhs_sign, s_neg_inputs, s_div_busy))
count := UFix(0)
neg_out := !isMul && Mux(isRem, lhs_sign, lhs_sign != rhs_sign)
divby0 := true
divisor := Cat(rhs_sign, rhs_in)
remainder := Cat(Fill(mulw-w, isMul && lhs_sign), Bool(false), lhs_in)
req := io.req.bits
}
io.resp.bits := req
io.resp.bits.data := Mux(req.dw === DW_32, Cat(Fill(w/2, remainder(w/2-1)), remainder(w/2-1,0)), remainder(w-1,0))
io.resp.valid := state === s_done
io.req.ready := state === s_ready
}
class Divider(earlyOut: Boolean = false)(implicit conf: RocketConfiguration) extends Component {
val io = new MultiplierIO
val w = io.req.bits.in1.getWidth
val s_ready :: s_neg_inputs :: s_busy :: s_move_rem :: s_neg_output :: s_done :: Nil = Enum(6) { UFix() };
val state = Reg(resetVal = s_ready);
val count = Reg() { UFix(width = log2Up(w+1)) }
val divby0 = Reg() { Bool() };
val neg_out = Reg() { Bool() };
val r_req = Reg{io.req.bits.clone}
val divisor = Reg() { Bits() }
val remainder = Reg() { Bits(width = 2*w+1) }
val subtractor = remainder(2*w,w) - divisor
def sext(x: Bits, cmds: Vec[Bits]) = {
val sign = Mux(io.req.bits.dw === DW_64, x(w-1), x(w/2-1)) && cmds.contains(io.req.bits.fn)
val hi = Mux(io.req.bits.dw === DW_64, x(w-1,w/2), Fill(w/2, sign))
(Cat(hi, x(w/2-1,0)), sign)
}
val (lhs_in, lhs_sign) = sext(io.req.bits.in1, AVec(FN_DIV, FN_REM))
val (rhs_in, rhs_sign) = sext(io.req.bits.in2, AVec(FN_DIV, FN_REM))
val r_isRem = isMulFN(r_req.fn, FN_REM) || isMulFN(r_req.fn, FN_REMU)
when (state === s_neg_inputs) {
state := s_busy
when (remainder(w-1)) {
remainder := -remainder(w-1,0)
}
when (divisor(w-1)) {
divisor := subtractor(w-1,0)
}
}
when (state === s_neg_output) {
remainder := -remainder(w-1,0)
state := s_done
}
when (state === s_move_rem) {
remainder := remainder(2*w, w+1)
state := Mux(neg_out, s_neg_output, s_done)
}
when (state === s_busy) {
when (count === UFix(w)) {
state := Mux(r_isRem, s_move_rem, Mux(neg_out && !divby0, s_neg_output, s_done))
}
count := count + UFix(1)
val msb = subtractor(w)
divby0 := divby0 && !msb
remainder := Cat(Mux(msb, remainder(2*w-1,w), subtractor(w-1,0)), remainder(w-1,0), !msb)
val divisorMSB = Log2(divisor, w)
val dividendMSB = Log2(remainder(w-1,0), w)
val eOutPos = UFix(w-1, log2Up(2*w)) + divisorMSB - dividendMSB
val eOut = count === UFix(0) && eOutPos > 0 && (divisorMSB != UFix(0) || divisor(0))
when (Bool(earlyOut) && eOut) {
val shift = eOutPos(log2Up(w)-1,0)
remainder := remainder(w-1,0) << shift
count := shift
when (eOutPos(log2Up(w))) {
remainder := remainder(w-1,0) << w-1
count := w-1
}
}
}
when (io.resp.fire() || io.kill) {
@ -84,20 +196,15 @@ class Divider(earlyOut: Boolean = false)(implicit conf: RocketConfiguration) ext
when (io.req.fire()) {
state := Mux(lhs_sign || rhs_sign, s_neg_inputs, s_busy)
count := UFix(0)
half := (dw === DW_32);
neg_quo := lhs_sign != rhs_sign
neg_rem := lhs_sign
rem := isMulFN(fn, FN_REM) || isMulFN(fn, FN_REMU)
divby0 := Bool(true);
neg_out := Mux(AVec(FN_REM, FN_REMU).contains(io.req.bits.fn), lhs_sign, lhs_sign != rhs_sign)
divby0 := true
divisor := rhs_in
remainder := lhs_in
r_req := io.req.bits
}
val result = Mux(rem, remainder(w+w, w+1), remainder(w-1,0))
io.resp.bits := r_req
io.resp.bits.data := Mux(half, Cat(Fill(w/2, result(w/2-1)), result(w/2-1,0)), result)
io.resp.bits.data := Mux(r_req.dw === DW_32, Cat(Fill(w/2, remainder(w/2-1)), remainder(w/2-1,0)), remainder(w-1,0))
io.resp.valid := state === s_done
io.req.ready := state === s_ready
}

View File

@ -158,31 +158,17 @@ class Datapath(implicit conf: RocketConfiguration) extends Component
alu.io.in2 := ex_op2.toUFix
alu.io.in1 := ex_rs1.toUFix
// divider
val div = new Divider(earlyOut = true)
div.io.req.valid := io.ctrl.div_val
// multiplier and divider
val div = new MulDiv(mulUnroll = 4, earlyOut = true)
div.io.req.valid := io.ctrl.div_mul_val
div.io.req.bits.dw := ex_reg_ctrl_fn_dw
div.io.req.bits.fn := ex_reg_ctrl_fn_alu
div.io.req.bits.in1 := ex_rs1
div.io.req.bits.in2 := ex_rs2
div.io.req.bits.tag := ex_reg_waddr
div.io.kill := io.ctrl.div_kill
div.io.kill := io.ctrl.div_mul_kill
div.io.resp.ready := Bool(true)
io.ctrl.div_rdy := div.io.req.ready
io.ctrl.div_result_val := div.io.resp.valid
// multiplier
val mul = new Multiplier(unroll = 4, earlyOut = true)
mul.io.req.valid := io.ctrl.mul_val
mul.io.req.bits.dw := ex_reg_ctrl_fn_dw
mul.io.req.bits.fn := ex_reg_ctrl_fn_alu
mul.io.req.bits.in1 := ex_rs1
mul.io.req.bits.in2 := ex_rs2
mul.io.req.bits.tag := ex_reg_waddr
mul.io.kill := io.ctrl.mul_kill
mul.io.resp.ready := Bool(true)
io.ctrl.mul_rdy := mul.io.req.ready
io.ctrl.mul_result_val := mul.io.resp.valid
io.ctrl.div_mul_rdy := div.io.req.ready
io.fpu.fromint_data := ex_rs1
io.ctrl.ex_waddr := ex_reg_waddr
@ -266,17 +252,10 @@ class Datapath(implicit conf: RocketConfiguration) extends Component
val dmem_resp_replay = io.dmem.resp.bits.replay && dmem_resp_xpu
val mem_ll_wdata = Bits()
mem_ll_wdata := mul.io.resp.bits.data
io.ctrl.mem_ll_waddr := mul.io.resp.bits.tag
io.ctrl.mem_ll_wb := mul.io.resp.valid
when (div.io.resp.valid) {
mul.io.resp.ready := Bool(false)
mem_ll_wdata := div.io.resp.bits.data
io.ctrl.mem_ll_waddr := div.io.resp.bits.tag
io.ctrl.mem_ll_wb := Bool(true)
}
mem_ll_wdata := div.io.resp.bits.data
io.ctrl.mem_ll_waddr := div.io.resp.bits.tag
io.ctrl.mem_ll_wb := div.io.resp.valid
when (dmem_resp_replay) {
mul.io.resp.ready := Bool(false)
div.io.resp.ready := Bool(false)
mem_ll_wdata := io.dmem.resp.bits.data_subword
io.ctrl.mem_ll_waddr := dmem_resp_waddr

View File

@ -9,17 +9,17 @@ object ALU
{
val SZ_ALU_FN = 4
val FN_X = Bits("b????")
val FN_ADD = UFix(0)
val FN_SL = UFix(1)
val FN_XOR = UFix(4)
val FN_OR = UFix(6)
val FN_AND = UFix(7)
val FN_SR = UFix(5)
val FN_SUB = UFix(8)
val FN_SLT = UFix(10)
val FN_SLTU = UFix(11)
val FN_SRA = UFix(13)
val FN_OP2 = UFix(15)
val FN_ADD = Bits(0)
val FN_SL = Bits(1)
val FN_XOR = Bits(4)
val FN_OR = Bits(6)
val FN_AND = Bits(7)
val FN_SR = Bits(5)
val FN_SUB = Bits(8)
val FN_SLT = Bits(10)
val FN_SLTU = Bits(11)
val FN_SRA = Bits(13)
val FN_OP2 = Bits(15)
val FN_DIV = FN_XOR
val FN_DIVU = FN_SR