clean up mul/div interface; use VU mul if HAVE_VEC
This commit is contained in:
parent
b3a3289d34
commit
4121fb178c
@ -37,9 +37,9 @@ object Constants
|
|||||||
|
|
||||||
val MUL_X = UFix(0, 2);
|
val MUL_X = UFix(0, 2);
|
||||||
val MUL_LO = UFix(0, 2);
|
val MUL_LO = UFix(0, 2);
|
||||||
val MUL_HU = UFix(1, 2);
|
val MUL_H = UFix(1, 2);
|
||||||
val MUL_HS = UFix(2, 2);
|
val MUL_HSU = UFix(2, 2);
|
||||||
val MUL_HSU = UFix(3, 2);
|
val MUL_HU = UFix(3, 2);
|
||||||
|
|
||||||
val DIV_X = UFix(0, 2);
|
val DIV_X = UFix(0, 2);
|
||||||
val DIV_D = UFix(0, 2);
|
val DIV_D = UFix(0, 2);
|
||||||
|
@ -173,6 +173,10 @@ class rocketProc(resetSignal: Bool = null) extends Component(resetSignal)
|
|||||||
vu.io.dmem_resp.bits.tag := dpath.io.ext_mem.resp_tag
|
vu.io.dmem_resp.bits.tag := dpath.io.ext_mem.resp_tag
|
||||||
vu.io.dmem_resp.bits.typ := dpath.io.ext_mem.resp_type
|
vu.io.dmem_resp.bits.typ := dpath.io.ext_mem.resp_type
|
||||||
|
|
||||||
|
// share vector integer multiplier with rocket
|
||||||
|
dpath.io.vec_imul_req <> vu.io.cp_imul_req
|
||||||
|
dpath.io.vec_imul_resp <> vu.io.cp_imul_resp
|
||||||
|
|
||||||
fpu.io.sfma.valid := Bool(false)
|
fpu.io.sfma.valid := Bool(false)
|
||||||
fpu.io.dfma.valid := Bool(false)
|
fpu.io.dfma.valid := Bool(false)
|
||||||
}
|
}
|
||||||
|
@ -183,7 +183,7 @@ class rocketCtrl extends Component
|
|||||||
SRAW-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_RTYPE,DW_32,FN_SRA, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N),
|
SRAW-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_RTYPE,DW_32,FN_SRA, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N),
|
||||||
|
|
||||||
MUL-> List(Y, N,BR_N, REN_Y,REN_Y,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, Y,MUL_LO, N,DIV_X, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N),
|
MUL-> List(Y, N,BR_N, REN_Y,REN_Y,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, Y,MUL_LO, N,DIV_X, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N),
|
||||||
MULH-> List(Y, N,BR_N, REN_Y,REN_Y,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, Y,MUL_HS, N,DIV_X, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N),
|
MULH-> List(Y, N,BR_N, REN_Y,REN_Y,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, Y,MUL_H, N,DIV_X, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N),
|
||||||
MULHU-> List(Y, N,BR_N, REN_Y,REN_Y,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, Y,MUL_HU, N,DIV_X, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N),
|
MULHU-> List(Y, N,BR_N, REN_Y,REN_Y,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, Y,MUL_HU, N,DIV_X, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N),
|
||||||
MULHSU-> List(Y, N,BR_N, REN_Y,REN_Y,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, Y,MUL_HSU,N,DIV_X, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N),
|
MULHSU-> List(Y, N,BR_N, REN_Y,REN_Y,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, Y,MUL_HSU,N,DIV_X, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N),
|
||||||
MULW-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_X, DW_32, FN_X, M_N,M_X, MT_X, Y,MUL_LO, N,DIV_X, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N),
|
MULW-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_X, DW_32, FN_X, M_N,M_X, MT_X, Y,MUL_LO, N,DIV_X, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N),
|
||||||
|
@ -1,28 +1,11 @@
|
|||||||
package Top {
|
package Top
|
||||||
|
|
||||||
import Chisel._
|
import Chisel._
|
||||||
import Node._;
|
import Node._
|
||||||
import Constants._;
|
import Constants._
|
||||||
|
|
||||||
class ioDivider(width: Int) extends Bundle {
|
class rocketDivider(width: Int) extends Component {
|
||||||
// requests
|
val io = new ioMultiplier
|
||||||
val div_val = Bool(INPUT);
|
|
||||||
val div_kill = Bool(INPUT);
|
|
||||||
val div_rdy = Bool(OUTPUT);
|
|
||||||
val dw = UFix(1, INPUT);
|
|
||||||
val div_fn = UFix(2, INPUT);
|
|
||||||
val div_tag = UFix(5, INPUT);
|
|
||||||
val in0 = Bits(width, INPUT);
|
|
||||||
val in1 = Bits(width, INPUT);
|
|
||||||
// responses
|
|
||||||
val result = Bits(width, OUTPUT);
|
|
||||||
val result_tag = UFix(5, OUTPUT);
|
|
||||||
val result_val = Bool(OUTPUT);
|
|
||||||
val result_rdy = Bool(INPUT);
|
|
||||||
}
|
|
||||||
|
|
||||||
class rocketDivider(width : Int) extends Component {
|
|
||||||
val io = new ioDivider(width);
|
|
||||||
|
|
||||||
val s_ready :: s_neg_inputs :: s_busy :: s_neg_outputs :: s_done :: Nil = Enum(5) { UFix() };
|
val s_ready :: s_neg_inputs :: s_busy :: s_neg_outputs :: s_done :: Nil = Enum(5) { UFix() };
|
||||||
val state = Reg(resetVal = s_ready);
|
val state = Reg(resetVal = s_ready);
|
||||||
@ -31,21 +14,23 @@ class rocketDivider(width : Int) extends Component {
|
|||||||
val divby0 = Reg() { Bool() };
|
val divby0 = Reg() { Bool() };
|
||||||
val neg_quo = Reg() { Bool() };
|
val neg_quo = Reg() { Bool() };
|
||||||
val neg_rem = Reg() { Bool() };
|
val neg_rem = Reg() { Bool() };
|
||||||
val reg_tag = Reg() { UFix() };
|
val reg_tag = Reg() { Bits() };
|
||||||
val rem = Reg() { Bool() };
|
val rem = Reg() { Bool() };
|
||||||
val half = Reg() { Bool() };
|
val half = Reg() { Bool() };
|
||||||
|
|
||||||
val divisor = Reg() { UFix() };
|
val divisor = Reg() { UFix() };
|
||||||
val remainder = Reg() { UFix() };
|
val remainder = Reg() { UFix() };
|
||||||
val subtractor = remainder(2*width, width).toUFix - divisor;
|
val subtractor = remainder(2*width, width).toUFix - divisor;
|
||||||
|
|
||||||
val tc = (io.div_fn === DIV_D) || (io.div_fn === DIV_R);
|
val dw = io.req.bits.fn(io.req.bits.fn.width-1)
|
||||||
|
val fn = io.req.bits.fn(io.req.bits.fn.width-2,0)
|
||||||
|
val tc = (fn === DIV_D) || (fn === DIV_R);
|
||||||
|
|
||||||
val do_kill = io.div_kill && Reg(io.div_rdy) // kill on 1st cycle only
|
val do_kill = io.req_kill && Reg(io.req.ready) // kill on 1st cycle only
|
||||||
|
|
||||||
switch (state) {
|
switch (state) {
|
||||||
is (s_ready) {
|
is (s_ready) {
|
||||||
when (io.div_val) {
|
when (io.req.valid) {
|
||||||
state := Mux(tc, s_neg_inputs, s_busy)
|
state := Mux(tc, s_neg_inputs, s_busy)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -64,7 +49,7 @@ class rocketDivider(width : Int) extends Component {
|
|||||||
state := s_done
|
state := s_done
|
||||||
}
|
}
|
||||||
is (s_done) {
|
is (s_done) {
|
||||||
when (io.result_rdy) {
|
when (io.resp_rdy) {
|
||||||
state := s_ready
|
state := s_ready
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -72,21 +57,21 @@ class rocketDivider(width : Int) extends Component {
|
|||||||
|
|
||||||
// state machine
|
// state machine
|
||||||
|
|
||||||
val lhs_sign = tc && Mux(io.dw === DW_64, io.in0(width-1), io.in0(width/2-1)).toBool
|
val lhs_sign = tc && Mux(dw === DW_64, io.req.bits.in0(width-1), io.req.bits.in0(width/2-1)).toBool
|
||||||
val lhs_hi = Mux(io.dw === DW_64, io.in0(width-1,width/2), Fill(width/2, lhs_sign))
|
val lhs_hi = Mux(dw === DW_64, io.req.bits.in0(width-1,width/2), Fill(width/2, lhs_sign))
|
||||||
val lhs_in = Cat(lhs_hi, io.in0(width/2-1,0))
|
val lhs_in = Cat(lhs_hi, io.req.bits.in0(width/2-1,0))
|
||||||
|
|
||||||
val rhs_sign = tc && Mux(io.dw === DW_64, io.in1(width-1), io.in1(width/2-1)).toBool
|
val rhs_sign = tc && Mux(dw === DW_64, io.req.bits.in1(width-1), io.req.bits.in1(width/2-1)).toBool
|
||||||
val rhs_hi = Mux(io.dw === DW_64, io.in1(width-1,width/2), Fill(width/2, rhs_sign))
|
val rhs_hi = Mux(dw === DW_64, io.req.bits.in1(width-1,width/2), Fill(width/2, rhs_sign))
|
||||||
val rhs_in = Cat(rhs_hi, io.in1(width/2-1,0))
|
val rhs_in = Cat(rhs_hi, io.req.bits.in1(width/2-1,0))
|
||||||
|
|
||||||
when ((state === s_ready) && io.div_val) {
|
when ((state === s_ready) && io.req.valid) {
|
||||||
count := UFix(0, log2up(width+1));
|
count := UFix(0, log2up(width+1));
|
||||||
half := (io.dw === DW_32);
|
half := (dw === DW_32);
|
||||||
neg_quo := Bool(false);
|
neg_quo := Bool(false);
|
||||||
neg_rem := Bool(false);
|
neg_rem := Bool(false);
|
||||||
rem := (io.div_fn === DIV_R) || (io.div_fn === DIV_RU);
|
rem := (fn === DIV_R) || (fn === DIV_RU);
|
||||||
reg_tag := io.div_tag;
|
reg_tag := io.req_tag;
|
||||||
divby0 := Bool(true);
|
divby0 := Bool(true);
|
||||||
divisor := rhs_in.toUFix;
|
divisor := rhs_in.toUFix;
|
||||||
remainder := Cat(UFix(0,width+1), lhs_in).toUFix;
|
remainder := Cat(UFix(0,width+1), lhs_in).toUFix;
|
||||||
@ -126,11 +111,9 @@ class rocketDivider(width : Int) extends Component {
|
|||||||
|
|
||||||
val result = Mux(rem, remainder(2*width, width+1), remainder(width-1,0));
|
val result = Mux(rem, remainder(2*width, width+1), remainder(width-1,0));
|
||||||
|
|
||||||
io.result := Mux(half, Cat(Fill(width/2, result(width/2-1)), result(width/2-1,0)), result);
|
io.resp_bits := Mux(half, Cat(Fill(width/2, result(width/2-1)), result(width/2-1,0)), result);
|
||||||
io.result_tag := reg_tag;
|
io.resp_tag := reg_tag;
|
||||||
io.result_val := (state === s_done);
|
io.resp_val := (state === s_done);
|
||||||
|
|
||||||
io.div_rdy := (state === s_ready);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
io.req.ready := (state === s_ready);
|
||||||
}
|
}
|
||||||
|
@ -40,6 +40,8 @@ class ioDpathAll extends Bundle()
|
|||||||
val fpu = new ioDpathFPU();
|
val fpu = new ioDpathFPU();
|
||||||
val vec_ctrl = new ioCtrlDpathVec().flip()
|
val vec_ctrl = new ioCtrlDpathVec().flip()
|
||||||
val vec_iface = new ioDpathVecInterface()
|
val vec_iface = new ioDpathVecInterface()
|
||||||
|
val vec_imul_req = new io_imul_req
|
||||||
|
val vec_imul_resp = Bits(hwacha.Config.DEF_XLEN, INPUT)
|
||||||
}
|
}
|
||||||
|
|
||||||
class rocketDpath extends Component
|
class rocketDpath extends Component
|
||||||
@ -56,16 +58,6 @@ class rocketDpath extends Component
|
|||||||
val alu = new rocketDpathALU();
|
val alu = new rocketDpathALU();
|
||||||
val ex_alu_out = alu.io.out;
|
val ex_alu_out = alu.io.out;
|
||||||
val ex_alu_adder_out = alu.io.adder_out;
|
val ex_alu_adder_out = alu.io.adder_out;
|
||||||
|
|
||||||
val div = new rocketDivider(64);
|
|
||||||
val div_result = div.io.result;
|
|
||||||
val div_result_tag = div.io.result_tag;
|
|
||||||
val div_result_val = div.io.result_val;
|
|
||||||
|
|
||||||
val mul = new rocketMultiplier();
|
|
||||||
val mul_result = mul.io.result;
|
|
||||||
val mul_result_tag = mul.io.result_tag;
|
|
||||||
val mul_result_val = mul.io.result_val;
|
|
||||||
|
|
||||||
val rfile = new rocketDpathRegfile();
|
val rfile = new rocketDpathRegfile();
|
||||||
|
|
||||||
@ -251,34 +243,39 @@ class rocketDpath extends Component
|
|||||||
alu.io.fn := ex_reg_ctrl_fn_alu;
|
alu.io.fn := ex_reg_ctrl_fn_alu;
|
||||||
alu.io.in2 := ex_reg_op2.toUFix;
|
alu.io.in2 := ex_reg_op2.toUFix;
|
||||||
alu.io.in1 := ex_reg_rs1.toUFix;
|
alu.io.in1 := ex_reg_rs1.toUFix;
|
||||||
|
|
||||||
// divider
|
|
||||||
div.io.dw := ex_reg_ctrl_fn_dw;
|
|
||||||
div.io.div_fn := ex_reg_ctrl_div_fn;
|
|
||||||
div.io.div_val := ex_reg_ctrl_div_val;
|
|
||||||
div.io.div_kill := io.ctrl.killm;
|
|
||||||
div.io.div_tag := ex_reg_waddr;
|
|
||||||
div.io.in0 := ex_reg_rs1;
|
|
||||||
div.io.in1 := ex_reg_rs2;
|
|
||||||
div.io.result_rdy:= !dmem_resp_replay
|
|
||||||
|
|
||||||
io.ctrl.div_rdy := div.io.div_rdy;
|
|
||||||
io.ctrl.div_result_val := div.io.result_val;
|
|
||||||
|
|
||||||
// multiplier
|
|
||||||
mul.io.mul_val := ex_reg_ctrl_mul_val;
|
|
||||||
mul.io.mul_kill:= io.ctrl.killm;
|
|
||||||
mul.io.dw := ex_reg_ctrl_fn_dw;
|
|
||||||
mul.io.mul_fn := ex_reg_ctrl_mul_fn;
|
|
||||||
mul.io.mul_tag := ex_reg_waddr;
|
|
||||||
mul.io.in0 := ex_reg_rs1;
|
|
||||||
mul.io.in1 := ex_reg_rs2;
|
|
||||||
|
|
||||||
io.fpu.fromint_data := ex_reg_rs1
|
io.fpu.fromint_data := ex_reg_rs1
|
||||||
|
|
||||||
io.ctrl.mul_rdy := mul.io.mul_rdy
|
// divider
|
||||||
io.ctrl.mul_result_val := mul.io.result_val;
|
val div = new rocketDivider(64)
|
||||||
mul.io.result_rdy := !dmem_resp_replay && !div.io.result_val
|
div.io.req.valid := ex_reg_ctrl_div_val
|
||||||
|
div.io.req.bits.fn := Cat(ex_reg_ctrl_fn_dw, ex_reg_ctrl_div_fn)
|
||||||
|
div.io.req.bits.in0 := ex_reg_rs1
|
||||||
|
div.io.req.bits.in1 := ex_reg_rs2
|
||||||
|
div.io.req_tag := ex_reg_waddr
|
||||||
|
div.io.req_kill := io.ctrl.killm
|
||||||
|
div.io.resp_rdy := !dmem_resp_replay
|
||||||
|
io.ctrl.div_rdy := div.io.req.ready
|
||||||
|
io.ctrl.div_result_val := div.io.resp_val
|
||||||
|
|
||||||
|
// multiplier
|
||||||
|
var mul_io = new rocketMultiplier().io
|
||||||
|
if (HAVE_VEC)
|
||||||
|
{
|
||||||
|
val vu_mul = new rocketVUMultiplier(nwbq = 1)
|
||||||
|
vu_mul.io.vu.req <> io.vec_imul_req
|
||||||
|
vu_mul.io.vu.resp <> io.vec_imul_resp
|
||||||
|
mul_io = vu_mul.io.cpu
|
||||||
|
}
|
||||||
|
mul_io.req.valid := ex_reg_ctrl_mul_val;
|
||||||
|
mul_io.req.bits.fn := Cat(ex_reg_ctrl_fn_dw, ex_reg_ctrl_mul_fn)
|
||||||
|
mul_io.req.bits.in0 := ex_reg_rs1
|
||||||
|
mul_io.req.bits.in1 := ex_reg_rs2
|
||||||
|
mul_io.req_tag := ex_reg_waddr
|
||||||
|
mul_io.req_kill := io.ctrl.killm
|
||||||
|
mul_io.resp_rdy := !dmem_resp_replay && !div.io.resp_val
|
||||||
|
io.ctrl.mul_rdy := mul_io.req.ready
|
||||||
|
io.ctrl.mul_result_val := mul_io.resp_val
|
||||||
|
|
||||||
io.ctrl.ex_waddr := ex_reg_waddr; // for load/use hazard detection & bypass control
|
io.ctrl.ex_waddr := ex_reg_waddr; // for load/use hazard detection & bypass control
|
||||||
|
|
||||||
@ -358,14 +355,14 @@ class rocketDpath extends Component
|
|||||||
r_dmem_fp_replay := io.dmem.resp_replay && dmem_resp_fpu;
|
r_dmem_fp_replay := io.dmem.resp_replay && dmem_resp_fpu;
|
||||||
|
|
||||||
val mem_ll_waddr = Mux(dmem_resp_replay, dmem_resp_waddr,
|
val mem_ll_waddr = Mux(dmem_resp_replay, dmem_resp_waddr,
|
||||||
Mux(div_result_val, div_result_tag,
|
Mux(div.io.resp_val, div.io.resp_tag,
|
||||||
Mux(mul_result_val, mul_result_tag,
|
Mux(mul_io.resp_val, mul_io.resp_tag,
|
||||||
mem_reg_waddr)))
|
mem_reg_waddr)))
|
||||||
val mem_ll_wdata = Mux(div_result_val, div_result,
|
val mem_ll_wdata = Mux(div.io.resp_val, div.io.resp_bits,
|
||||||
Mux(mul_result_val, mul_result,
|
Mux(mul_io.resp_val, mul_io.resp_bits,
|
||||||
Mux(io.ctrl.mem_fp_val && io.ctrl.mem_wen, io.fpu.toint_data,
|
Mux(io.ctrl.mem_fp_val && io.ctrl.mem_wen, io.fpu.toint_data,
|
||||||
mem_reg_wdata)))
|
mem_reg_wdata)))
|
||||||
val mem_ll_wb = dmem_resp_replay || div_result_val || mul_result_val
|
val mem_ll_wb = dmem_resp_replay || div.io.resp_val || mul_io.resp_val
|
||||||
|
|
||||||
io.fpu.dmem_resp_val := io.dmem.resp_val && dmem_resp_fpu
|
io.fpu.dmem_resp_val := io.dmem.resp_val && dmem_resp_fpu
|
||||||
io.fpu.dmem_resp_data := io.dmem.resp_data
|
io.fpu.dmem_resp_data := io.dmem.resp_data
|
||||||
|
@ -1,75 +1,118 @@
|
|||||||
package Top {
|
package Top
|
||||||
|
|
||||||
import Chisel._
|
import Chisel._
|
||||||
import Node._;
|
import Node._
|
||||||
import Constants._;
|
import Constants._
|
||||||
|
import hwacha._
|
||||||
|
import hwacha.Config._
|
||||||
|
|
||||||
class ioMultiplier(width: Int) extends Bundle {
|
class ioMultiplier extends Bundle {
|
||||||
// requests
|
val req = new io_imul_req().flip()
|
||||||
val mul_val = Bool(INPUT);
|
val req_tag = Bits(5, INPUT)
|
||||||
val mul_kill= Bool(INPUT);
|
val req_kill = Bool(INPUT)
|
||||||
val mul_rdy = Bool(OUTPUT);
|
val resp_val = Bool(OUTPUT)
|
||||||
val dw = UFix(1, INPUT);
|
val resp_rdy = Bool(INPUT)
|
||||||
val mul_fn = UFix(2, INPUT);
|
val resp_tag = Bits(5, OUTPUT)
|
||||||
val mul_tag = UFix(CPU_TAG_BITS, INPUT);
|
val resp_bits = Bits(DEF_XLEN, OUTPUT)
|
||||||
val in0 = Bits(width, INPUT);
|
}
|
||||||
val in1 = Bits(width, INPUT);
|
|
||||||
|
class rocketVUMultiplier(nwbq: Int) extends Component {
|
||||||
// responses
|
val io = new Bundle {
|
||||||
val result = Bits(width, OUTPUT);
|
val cpu = new ioMultiplier
|
||||||
val result_tag = UFix(CPU_TAG_BITS, OUTPUT);
|
val vu = new Bundle {
|
||||||
val result_val = Bool(OUTPUT);
|
val req = new io_imul_req
|
||||||
val result_rdy = Bool(INPUT);
|
val resp = Bits(DEF_XLEN, INPUT)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
val valid = Reg(resetVal = Bits(0, IMUL_STAGES))
|
||||||
|
val wbq_cnt = Reg(resetVal = Bits(0, log2up(nwbq+1)))
|
||||||
|
val tag = Vec(IMUL_STAGES) { Reg() { Bits() } }
|
||||||
|
|
||||||
|
val fire = io.cpu.req.valid && io.cpu.req.ready
|
||||||
|
|
||||||
|
valid := Cat(fire, valid(IMUL_STAGES-1) && !io.cpu.req_kill, valid(IMUL_STAGES-2,1))
|
||||||
|
when (fire) {
|
||||||
|
tag(IMUL_STAGES-1) := io.cpu.req_tag
|
||||||
|
}
|
||||||
|
for (i <- 0 until IMUL_STAGES-1) {
|
||||||
|
tag(i) := tag(i+1)
|
||||||
|
}
|
||||||
|
when (valid(0) != (io.cpu.resp_val && io.cpu.resp_rdy)) {
|
||||||
|
wbq_cnt := Mux(valid(0), wbq_cnt + UFix(1), wbq_cnt - UFix(1))
|
||||||
|
}
|
||||||
|
|
||||||
|
var inflight_cnt = valid(0)
|
||||||
|
for (i <- 1 until IMUL_STAGES)
|
||||||
|
inflight_cnt = inflight_cnt + valid(i)
|
||||||
|
inflight_cnt = inflight_cnt + wbq_cnt
|
||||||
|
val wbq_rdy = inflight_cnt < UFix(nwbq)
|
||||||
|
|
||||||
|
val wbq = (new queue(nwbq)) { Bits(width = io.cpu.resp_bits.width + io.cpu.resp_tag.width) }
|
||||||
|
wbq.io.enq.valid := valid(0)
|
||||||
|
wbq.io.enq.bits := Cat(io.vu.resp, tag(0))
|
||||||
|
wbq.io.deq.ready := io.cpu.resp_rdy
|
||||||
|
|
||||||
|
io.cpu.req.ready := io.vu.req.ready && wbq_rdy
|
||||||
|
io.cpu.resp_val := wbq.io.deq.valid
|
||||||
|
io.cpu.resp_bits := wbq.io.deq.bits >> UFix(io.cpu.resp_tag.width)
|
||||||
|
io.cpu.resp_tag := wbq.io.deq.bits(io.cpu.resp_tag.width-1,0)
|
||||||
|
|
||||||
|
io.vu.req <> io.cpu.req
|
||||||
}
|
}
|
||||||
|
|
||||||
class rocketMultiplier extends Component {
|
class rocketMultiplier extends Component {
|
||||||
val io = new ioMultiplier(64);
|
val io = new ioMultiplier
|
||||||
// width must be even (booth).
|
// w must be even (booth).
|
||||||
// we need an extra bit to handle signed vs. unsigned,
|
// we need an extra bit to handle signed vs. unsigned,
|
||||||
// so we need to add a second to keep width even.
|
// so we need to add a second to keep w even.
|
||||||
val width = 64 + 2
|
val w = 64 + 2
|
||||||
// unroll must divide width/2
|
|
||||||
val unroll = 3
|
val unroll = 3
|
||||||
|
|
||||||
val cycles = width/unroll/2
|
require(w % 2 == 0 && (w/2) % unroll == 0)
|
||||||
|
|
||||||
|
val cycles = w/unroll/2
|
||||||
|
|
||||||
val r_val = Reg(resetVal = Bool(false));
|
val r_val = Reg(resetVal = Bool(false));
|
||||||
val r_dw = Reg { UFix() }
|
val r_dw = Reg { Bits() }
|
||||||
val r_fn = Reg { UFix() }
|
val r_fn = Reg { Bits() }
|
||||||
val r_tag = Reg { UFix() }
|
val r_tag = Reg { Bits() }
|
||||||
val r_lhs = Reg { Bits() }
|
val r_lhs = Reg { Bits() }
|
||||||
val r_prod= Reg { Bits(width = width*2) }
|
val r_prod= Reg { Bits(width = w*2) }
|
||||||
val r_lsb = Reg { Bits() }
|
val r_lsb = Reg { Bits() }
|
||||||
val r_cnt = Reg { UFix(width = log2up(cycles+1)) }
|
val r_cnt = Reg { UFix(width = log2up(cycles+1)) }
|
||||||
|
|
||||||
val lhs_msb = Mux(io.dw === DW_64, io.in0(63), io.in0(31)).toBool
|
val dw = io.req.bits.fn(io.req.bits.fn.width-1)
|
||||||
val lhs_sign = ((io.mul_fn === MUL_HS) || (io.mul_fn === MUL_HSU)) && lhs_msb
|
val fn = io.req.bits.fn(io.req.bits.fn.width-2,0)
|
||||||
val lhs_hi = Mux(io.dw === DW_64, io.in0(63,32), Fill(32, lhs_sign))
|
|
||||||
val lhs_in = Cat(lhs_sign, lhs_hi, io.in0(31,0))
|
|
||||||
|
|
||||||
val rhs_msb = Mux(io.dw === DW_64, io.in1(63), io.in1(31)).toBool
|
val lhs_msb = Mux(dw === DW_64, io.req.bits.in0(63), io.req.bits.in0(31)).toBool
|
||||||
val rhs_sign = (io.mul_fn === MUL_HS) && rhs_msb
|
val lhs_sign = ((fn === MUL_H) || (fn === MUL_HSU)) && lhs_msb
|
||||||
val rhs_hi = Mux(io.dw === DW_64, io.in1(63,32), Fill(32, rhs_sign))
|
val lhs_hi = Mux(dw === DW_64, io.req.bits.in0(63,32), Fill(32, lhs_sign))
|
||||||
val rhs_in = Cat(rhs_sign, rhs_sign, rhs_hi, io.in1(31,0))
|
val lhs_in = Cat(lhs_sign, lhs_hi, io.req.bits.in0(31,0))
|
||||||
|
|
||||||
val do_kill = io.mul_kill && r_cnt === UFix(0) // can only kill on 1st cycle
|
val rhs_msb = Mux(dw === DW_64, io.req.bits.in1(63), io.req.bits.in1(31)).toBool
|
||||||
|
val rhs_sign = (fn === MUL_H) && rhs_msb
|
||||||
|
val rhs_hi = Mux(dw === DW_64, io.req.bits.in1(63,32), Fill(32, rhs_sign))
|
||||||
|
val rhs_in = Cat(rhs_sign, rhs_sign, rhs_hi, io.req.bits.in1(31,0))
|
||||||
|
|
||||||
|
val do_kill = io.req_kill && r_cnt === UFix(0) // can only kill on 1st cycle
|
||||||
|
|
||||||
when (io.mul_val && io.mul_rdy) {
|
when (io.req.valid && io.req.ready) {
|
||||||
r_val := Bool(true)
|
r_val := Bool(true)
|
||||||
r_cnt := UFix(0, log2up(cycles+1))
|
r_cnt := UFix(0, log2up(cycles+1))
|
||||||
r_dw := io.dw
|
r_dw := dw
|
||||||
r_fn := io.mul_fn
|
r_fn := fn
|
||||||
r_tag := io.mul_tag
|
r_tag := io.req_tag
|
||||||
r_lhs := lhs_in
|
r_lhs := lhs_in
|
||||||
r_prod:= rhs_in
|
r_prod:= rhs_in
|
||||||
r_lsb := Bool(false)
|
r_lsb := Bool(false)
|
||||||
}
|
}
|
||||||
.elsewhen (io.result_val && io.result_rdy || do_kill) { // can only kill on first cycle
|
.elsewhen (io.resp_val && io.resp_rdy || do_kill) { // can only kill on first cycle
|
||||||
r_val := Bool(false)
|
r_val := Bool(false)
|
||||||
}
|
}
|
||||||
|
|
||||||
val lhs_sext = Cat(r_lhs(width-2), r_lhs(width-2), r_lhs).toUFix
|
val lhs_sext = Cat(r_lhs(w-2), r_lhs(w-2), r_lhs).toUFix
|
||||||
val lhs_twice = Cat(r_lhs(width-2), r_lhs, Bits(0,1)).toUFix
|
val lhs_twice = Cat(r_lhs(w-2), r_lhs, Bits(0,1)).toUFix
|
||||||
|
|
||||||
var prod = r_prod
|
var prod = r_prod
|
||||||
var lsb = r_lsb
|
var lsb = r_lsb
|
||||||
@ -79,12 +122,12 @@ class rocketMultiplier extends Component {
|
|||||||
Mux(prod(0) != prod(1), lhs_twice,
|
Mux(prod(0) != prod(1), lhs_twice,
|
||||||
UFix(0)));
|
UFix(0)));
|
||||||
val sub = prod(1)
|
val sub = prod(1)
|
||||||
val adder_lhs = Cat(prod(width*2-1), prod(width*2-1,width)).toUFix
|
val adder_lhs = Cat(prod(w*2-1), prod(w*2-1,w)).toUFix
|
||||||
val adder_rhs = Mux(sub, ~addend, addend)
|
val adder_rhs = Mux(sub, ~addend, addend)
|
||||||
val adder_out = (adder_lhs + adder_rhs + sub.toUFix)(width,0)
|
val adder_out = (adder_lhs + adder_rhs + sub.toUFix)(w,0)
|
||||||
|
|
||||||
lsb = prod(1)
|
lsb = prod(1)
|
||||||
prod = Cat(adder_out(width), adder_out, prod(width-1,2))
|
prod = Cat(adder_out(w), adder_out, prod(w-1,2))
|
||||||
}
|
}
|
||||||
|
|
||||||
when (r_val && (r_cnt != UFix(cycles))) {
|
when (r_val && (r_cnt != UFix(cycles))) {
|
||||||
@ -99,10 +142,8 @@ class rocketMultiplier extends Component {
|
|||||||
|
|
||||||
val mul_output = Mux(r_dw === DW_64, mul_output64, mul_output32_ext)
|
val mul_output = Mux(r_dw === DW_64, mul_output64, mul_output32_ext)
|
||||||
|
|
||||||
io.mul_rdy := !r_val
|
io.req.ready := !r_val
|
||||||
io.result := mul_output;
|
io.resp_bits := mul_output;
|
||||||
io.result_tag := r_tag;
|
io.resp_tag := r_tag;
|
||||||
io.result_val := r_val && (r_cnt === UFix(cycles))
|
io.resp_val := r_val && (r_cnt === UFix(cycles))
|
||||||
}
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -14,32 +14,42 @@ class queue[T <: Data](entries: Int, flushable: Boolean = false)(data: => T) ext
|
|||||||
{
|
{
|
||||||
val io = new ioQueue(flushable)(data)
|
val io = new ioQueue(flushable)(data)
|
||||||
|
|
||||||
val enq_ptr = Reg(resetVal = UFix(0, log2up(entries)))
|
|
||||||
val deq_ptr = Reg(resetVal = UFix(0, log2up(entries)))
|
|
||||||
val maybe_full = Reg(resetVal = Bool(false))
|
|
||||||
|
|
||||||
io.deq.valid := maybe_full || enq_ptr != deq_ptr
|
|
||||||
io.enq.ready := !maybe_full || enq_ptr != deq_ptr
|
|
||||||
|
|
||||||
val do_enq = io.enq.ready && io.enq.valid
|
val do_enq = io.enq.ready && io.enq.valid
|
||||||
val do_deq = io.deq.ready && io.deq.valid
|
val do_deq = io.deq.ready && io.deq.valid
|
||||||
|
|
||||||
when (do_deq) {
|
var enq_ptr = UFix(0)
|
||||||
deq_ptr := deq_ptr + UFix(1)
|
var deq_ptr = UFix(0)
|
||||||
}
|
|
||||||
when (do_enq) {
|
if (entries > 1)
|
||||||
enq_ptr := enq_ptr + UFix(1)
|
{
|
||||||
|
enq_ptr = Reg(resetVal = UFix(0, log2up(entries)))
|
||||||
|
deq_ptr = Reg(resetVal = UFix(0, log2up(entries)))
|
||||||
|
|
||||||
|
when (do_deq) {
|
||||||
|
deq_ptr := deq_ptr + UFix(1)
|
||||||
|
}
|
||||||
|
when (do_enq) {
|
||||||
|
enq_ptr := enq_ptr + UFix(1)
|
||||||
|
}
|
||||||
|
if (flushable) {
|
||||||
|
when (io.flush) {
|
||||||
|
deq_ptr := UFix(0)
|
||||||
|
enq_ptr := UFix(0)
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
val maybe_full = Reg(resetVal = Bool(false))
|
||||||
when (do_enq != do_deq) {
|
when (do_enq != do_deq) {
|
||||||
maybe_full := do_enq
|
maybe_full := do_enq
|
||||||
}
|
}
|
||||||
if (flushable) {
|
if (flushable) {
|
||||||
when (io.flush) {
|
when (io.flush) {
|
||||||
deq_ptr := UFix(0)
|
|
||||||
enq_ptr := UFix(0)
|
|
||||||
maybe_full := Bool(false)
|
maybe_full := Bool(false)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
io.deq.valid := maybe_full || enq_ptr != deq_ptr
|
||||||
|
io.enq.ready := !maybe_full || enq_ptr != deq_ptr
|
||||||
io.deq.bits <> Mem(entries, do_enq, enq_ptr, io.enq.bits).read(deq_ptr)
|
io.deq.bits <> Mem(entries, do_enq, enq_ptr, io.enq.bits).read(deq_ptr)
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user