1
0

clean up mul/div interface; use VU mul if HAVE_VEC

This commit is contained in:
Andrew Waterman 2012-02-24 19:22:35 -08:00
parent b3a3289d34
commit 4121fb178c
7 changed files with 192 additions and 157 deletions

View File

@ -37,9 +37,9 @@ object Constants
val MUL_X = UFix(0, 2);
val MUL_LO = UFix(0, 2);
val MUL_HU = UFix(1, 2);
val MUL_HS = UFix(2, 2);
val MUL_HSU = UFix(3, 2);
val MUL_H = UFix(1, 2);
val MUL_HSU = UFix(2, 2);
val MUL_HU = UFix(3, 2);
val DIV_X = UFix(0, 2);
val DIV_D = UFix(0, 2);

View File

@ -173,6 +173,10 @@ class rocketProc(resetSignal: Bool = null) extends Component(resetSignal)
vu.io.dmem_resp.bits.tag := dpath.io.ext_mem.resp_tag
vu.io.dmem_resp.bits.typ := dpath.io.ext_mem.resp_type
// share vector integer multiplier with rocket
dpath.io.vec_imul_req <> vu.io.cp_imul_req
dpath.io.vec_imul_resp <> vu.io.cp_imul_resp
fpu.io.sfma.valid := Bool(false)
fpu.io.dfma.valid := Bool(false)
}

View File

@ -183,7 +183,7 @@ class rocketCtrl extends Component
SRAW-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_RTYPE,DW_32,FN_SRA, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N),
MUL-> List(Y, N,BR_N, REN_Y,REN_Y,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, Y,MUL_LO, N,DIV_X, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N),
MULH-> List(Y, N,BR_N, REN_Y,REN_Y,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, Y,MUL_HS, N,DIV_X, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N),
MULH-> List(Y, N,BR_N, REN_Y,REN_Y,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, Y,MUL_H, N,DIV_X, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N),
MULHU-> List(Y, N,BR_N, REN_Y,REN_Y,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, Y,MUL_HU, N,DIV_X, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N),
MULHSU-> List(Y, N,BR_N, REN_Y,REN_Y,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, Y,MUL_HSU,N,DIV_X, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N),
MULW-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_X, DW_32, FN_X, M_N,M_X, MT_X, Y,MUL_LO, N,DIV_X, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N),

View File

@ -1,28 +1,11 @@
package Top {
package Top
import Chisel._
import Node._;
import Constants._;
import Node._
import Constants._
class ioDivider(width: Int) extends Bundle {
// requests
val div_val = Bool(INPUT);
val div_kill = Bool(INPUT);
val div_rdy = Bool(OUTPUT);
val dw = UFix(1, INPUT);
val div_fn = UFix(2, INPUT);
val div_tag = UFix(5, INPUT);
val in0 = Bits(width, INPUT);
val in1 = Bits(width, INPUT);
// responses
val result = Bits(width, OUTPUT);
val result_tag = UFix(5, OUTPUT);
val result_val = Bool(OUTPUT);
val result_rdy = Bool(INPUT);
}
class rocketDivider(width : Int) extends Component {
val io = new ioDivider(width);
class rocketDivider(width: Int) extends Component {
val io = new ioMultiplier
val s_ready :: s_neg_inputs :: s_busy :: s_neg_outputs :: s_done :: Nil = Enum(5) { UFix() };
val state = Reg(resetVal = s_ready);
@ -31,7 +14,7 @@ class rocketDivider(width : Int) extends Component {
val divby0 = Reg() { Bool() };
val neg_quo = Reg() { Bool() };
val neg_rem = Reg() { Bool() };
val reg_tag = Reg() { UFix() };
val reg_tag = Reg() { Bits() };
val rem = Reg() { Bool() };
val half = Reg() { Bool() };
@ -39,13 +22,15 @@ class rocketDivider(width : Int) extends Component {
val remainder = Reg() { UFix() };
val subtractor = remainder(2*width, width).toUFix - divisor;
val tc = (io.div_fn === DIV_D) || (io.div_fn === DIV_R);
val dw = io.req.bits.fn(io.req.bits.fn.width-1)
val fn = io.req.bits.fn(io.req.bits.fn.width-2,0)
val tc = (fn === DIV_D) || (fn === DIV_R);
val do_kill = io.div_kill && Reg(io.div_rdy) // kill on 1st cycle only
val do_kill = io.req_kill && Reg(io.req.ready) // kill on 1st cycle only
switch (state) {
is (s_ready) {
when (io.div_val) {
when (io.req.valid) {
state := Mux(tc, s_neg_inputs, s_busy)
}
}
@ -64,7 +49,7 @@ class rocketDivider(width : Int) extends Component {
state := s_done
}
is (s_done) {
when (io.result_rdy) {
when (io.resp_rdy) {
state := s_ready
}
}
@ -72,21 +57,21 @@ class rocketDivider(width : Int) extends Component {
// state machine
val lhs_sign = tc && Mux(io.dw === DW_64, io.in0(width-1), io.in0(width/2-1)).toBool
val lhs_hi = Mux(io.dw === DW_64, io.in0(width-1,width/2), Fill(width/2, lhs_sign))
val lhs_in = Cat(lhs_hi, io.in0(width/2-1,0))
val lhs_sign = tc && Mux(dw === DW_64, io.req.bits.in0(width-1), io.req.bits.in0(width/2-1)).toBool
val lhs_hi = Mux(dw === DW_64, io.req.bits.in0(width-1,width/2), Fill(width/2, lhs_sign))
val lhs_in = Cat(lhs_hi, io.req.bits.in0(width/2-1,0))
val rhs_sign = tc && Mux(io.dw === DW_64, io.in1(width-1), io.in1(width/2-1)).toBool
val rhs_hi = Mux(io.dw === DW_64, io.in1(width-1,width/2), Fill(width/2, rhs_sign))
val rhs_in = Cat(rhs_hi, io.in1(width/2-1,0))
val rhs_sign = tc && Mux(dw === DW_64, io.req.bits.in1(width-1), io.req.bits.in1(width/2-1)).toBool
val rhs_hi = Mux(dw === DW_64, io.req.bits.in1(width-1,width/2), Fill(width/2, rhs_sign))
val rhs_in = Cat(rhs_hi, io.req.bits.in1(width/2-1,0))
when ((state === s_ready) && io.div_val) {
when ((state === s_ready) && io.req.valid) {
count := UFix(0, log2up(width+1));
half := (io.dw === DW_32);
half := (dw === DW_32);
neg_quo := Bool(false);
neg_rem := Bool(false);
rem := (io.div_fn === DIV_R) || (io.div_fn === DIV_RU);
reg_tag := io.div_tag;
rem := (fn === DIV_R) || (fn === DIV_RU);
reg_tag := io.req_tag;
divby0 := Bool(true);
divisor := rhs_in.toUFix;
remainder := Cat(UFix(0,width+1), lhs_in).toUFix;
@ -126,11 +111,9 @@ class rocketDivider(width : Int) extends Component {
val result = Mux(rem, remainder(2*width, width+1), remainder(width-1,0));
io.result := Mux(half, Cat(Fill(width/2, result(width/2-1)), result(width/2-1,0)), result);
io.result_tag := reg_tag;
io.result_val := (state === s_done);
io.div_rdy := (state === s_ready);
}
io.resp_bits := Mux(half, Cat(Fill(width/2, result(width/2-1)), result(width/2-1,0)), result);
io.resp_tag := reg_tag;
io.resp_val := (state === s_done);
io.req.ready := (state === s_ready);
}

View File

@ -40,6 +40,8 @@ class ioDpathAll extends Bundle()
val fpu = new ioDpathFPU();
val vec_ctrl = new ioCtrlDpathVec().flip()
val vec_iface = new ioDpathVecInterface()
val vec_imul_req = new io_imul_req
val vec_imul_resp = Bits(hwacha.Config.DEF_XLEN, INPUT)
}
class rocketDpath extends Component
@ -57,16 +59,6 @@ class rocketDpath extends Component
val ex_alu_out = alu.io.out;
val ex_alu_adder_out = alu.io.adder_out;
val div = new rocketDivider(64);
val div_result = div.io.result;
val div_result_tag = div.io.result_tag;
val div_result_val = div.io.result_val;
val mul = new rocketMultiplier();
val mul_result = mul.io.result;
val mul_result_tag = mul.io.result_tag;
val mul_result_val = mul.io.result_val;
val rfile = new rocketDpathRegfile();
// instruction fetch definitions
@ -252,33 +244,38 @@ class rocketDpath extends Component
alu.io.in2 := ex_reg_op2.toUFix;
alu.io.in1 := ex_reg_rs1.toUFix;
// divider
div.io.dw := ex_reg_ctrl_fn_dw;
div.io.div_fn := ex_reg_ctrl_div_fn;
div.io.div_val := ex_reg_ctrl_div_val;
div.io.div_kill := io.ctrl.killm;
div.io.div_tag := ex_reg_waddr;
div.io.in0 := ex_reg_rs1;
div.io.in1 := ex_reg_rs2;
div.io.result_rdy:= !dmem_resp_replay
io.ctrl.div_rdy := div.io.div_rdy;
io.ctrl.div_result_val := div.io.result_val;
// multiplier
mul.io.mul_val := ex_reg_ctrl_mul_val;
mul.io.mul_kill:= io.ctrl.killm;
mul.io.dw := ex_reg_ctrl_fn_dw;
mul.io.mul_fn := ex_reg_ctrl_mul_fn;
mul.io.mul_tag := ex_reg_waddr;
mul.io.in0 := ex_reg_rs1;
mul.io.in1 := ex_reg_rs2;
io.fpu.fromint_data := ex_reg_rs1
io.ctrl.mul_rdy := mul.io.mul_rdy
io.ctrl.mul_result_val := mul.io.result_val;
mul.io.result_rdy := !dmem_resp_replay && !div.io.result_val
// divider
val div = new rocketDivider(64)
div.io.req.valid := ex_reg_ctrl_div_val
div.io.req.bits.fn := Cat(ex_reg_ctrl_fn_dw, ex_reg_ctrl_div_fn)
div.io.req.bits.in0 := ex_reg_rs1
div.io.req.bits.in1 := ex_reg_rs2
div.io.req_tag := ex_reg_waddr
div.io.req_kill := io.ctrl.killm
div.io.resp_rdy := !dmem_resp_replay
io.ctrl.div_rdy := div.io.req.ready
io.ctrl.div_result_val := div.io.resp_val
// multiplier
var mul_io = new rocketMultiplier().io
if (HAVE_VEC)
{
val vu_mul = new rocketVUMultiplier(nwbq = 1)
vu_mul.io.vu.req <> io.vec_imul_req
vu_mul.io.vu.resp <> io.vec_imul_resp
mul_io = vu_mul.io.cpu
}
mul_io.req.valid := ex_reg_ctrl_mul_val;
mul_io.req.bits.fn := Cat(ex_reg_ctrl_fn_dw, ex_reg_ctrl_mul_fn)
mul_io.req.bits.in0 := ex_reg_rs1
mul_io.req.bits.in1 := ex_reg_rs2
mul_io.req_tag := ex_reg_waddr
mul_io.req_kill := io.ctrl.killm
mul_io.resp_rdy := !dmem_resp_replay && !div.io.resp_val
io.ctrl.mul_rdy := mul_io.req.ready
io.ctrl.mul_result_val := mul_io.resp_val
io.ctrl.ex_waddr := ex_reg_waddr; // for load/use hazard detection & bypass control
@ -358,14 +355,14 @@ class rocketDpath extends Component
r_dmem_fp_replay := io.dmem.resp_replay && dmem_resp_fpu;
val mem_ll_waddr = Mux(dmem_resp_replay, dmem_resp_waddr,
Mux(div_result_val, div_result_tag,
Mux(mul_result_val, mul_result_tag,
Mux(div.io.resp_val, div.io.resp_tag,
Mux(mul_io.resp_val, mul_io.resp_tag,
mem_reg_waddr)))
val mem_ll_wdata = Mux(div_result_val, div_result,
Mux(mul_result_val, mul_result,
val mem_ll_wdata = Mux(div.io.resp_val, div.io.resp_bits,
Mux(mul_io.resp_val, mul_io.resp_bits,
Mux(io.ctrl.mem_fp_val && io.ctrl.mem_wen, io.fpu.toint_data,
mem_reg_wdata)))
val mem_ll_wb = dmem_resp_replay || div_result_val || mul_result_val
val mem_ll_wb = dmem_resp_replay || div.io.resp_val || mul_io.resp_val
io.fpu.dmem_resp_val := io.dmem.resp_val && dmem_resp_fpu
io.fpu.dmem_resp_data := io.dmem.resp_data

View File

@ -1,75 +1,118 @@
package Top {
package Top
import Chisel._
import Node._;
import Constants._;
import Node._
import Constants._
import hwacha._
import hwacha.Config._
class ioMultiplier(width: Int) extends Bundle {
// requests
val mul_val = Bool(INPUT);
val mul_kill= Bool(INPUT);
val mul_rdy = Bool(OUTPUT);
val dw = UFix(1, INPUT);
val mul_fn = UFix(2, INPUT);
val mul_tag = UFix(CPU_TAG_BITS, INPUT);
val in0 = Bits(width, INPUT);
val in1 = Bits(width, INPUT);
class ioMultiplier extends Bundle {
val req = new io_imul_req().flip()
val req_tag = Bits(5, INPUT)
val req_kill = Bool(INPUT)
val resp_val = Bool(OUTPUT)
val resp_rdy = Bool(INPUT)
val resp_tag = Bits(5, OUTPUT)
val resp_bits = Bits(DEF_XLEN, OUTPUT)
}
// responses
val result = Bits(width, OUTPUT);
val result_tag = UFix(CPU_TAG_BITS, OUTPUT);
val result_val = Bool(OUTPUT);
val result_rdy = Bool(INPUT);
class rocketVUMultiplier(nwbq: Int) extends Component {
val io = new Bundle {
val cpu = new ioMultiplier
val vu = new Bundle {
val req = new io_imul_req
val resp = Bits(DEF_XLEN, INPUT)
}
}
val valid = Reg(resetVal = Bits(0, IMUL_STAGES))
val wbq_cnt = Reg(resetVal = Bits(0, log2up(nwbq+1)))
val tag = Vec(IMUL_STAGES) { Reg() { Bits() } }
val fire = io.cpu.req.valid && io.cpu.req.ready
valid := Cat(fire, valid(IMUL_STAGES-1) && !io.cpu.req_kill, valid(IMUL_STAGES-2,1))
when (fire) {
tag(IMUL_STAGES-1) := io.cpu.req_tag
}
for (i <- 0 until IMUL_STAGES-1) {
tag(i) := tag(i+1)
}
when (valid(0) != (io.cpu.resp_val && io.cpu.resp_rdy)) {
wbq_cnt := Mux(valid(0), wbq_cnt + UFix(1), wbq_cnt - UFix(1))
}
var inflight_cnt = valid(0)
for (i <- 1 until IMUL_STAGES)
inflight_cnt = inflight_cnt + valid(i)
inflight_cnt = inflight_cnt + wbq_cnt
val wbq_rdy = inflight_cnt < UFix(nwbq)
val wbq = (new queue(nwbq)) { Bits(width = io.cpu.resp_bits.width + io.cpu.resp_tag.width) }
wbq.io.enq.valid := valid(0)
wbq.io.enq.bits := Cat(io.vu.resp, tag(0))
wbq.io.deq.ready := io.cpu.resp_rdy
io.cpu.req.ready := io.vu.req.ready && wbq_rdy
io.cpu.resp_val := wbq.io.deq.valid
io.cpu.resp_bits := wbq.io.deq.bits >> UFix(io.cpu.resp_tag.width)
io.cpu.resp_tag := wbq.io.deq.bits(io.cpu.resp_tag.width-1,0)
io.vu.req <> io.cpu.req
}
class rocketMultiplier extends Component {
val io = new ioMultiplier(64);
// width must be even (booth).
val io = new ioMultiplier
// w must be even (booth).
// we need an extra bit to handle signed vs. unsigned,
// so we need to add a second to keep width even.
val width = 64 + 2
// unroll must divide width/2
// so we need to add a second to keep w even.
val w = 64 + 2
val unroll = 3
val cycles = width/unroll/2
require(w % 2 == 0 && (w/2) % unroll == 0)
val cycles = w/unroll/2
val r_val = Reg(resetVal = Bool(false));
val r_dw = Reg { UFix() }
val r_fn = Reg { UFix() }
val r_tag = Reg { UFix() }
val r_dw = Reg { Bits() }
val r_fn = Reg { Bits() }
val r_tag = Reg { Bits() }
val r_lhs = Reg { Bits() }
val r_prod= Reg { Bits(width = width*2) }
val r_prod= Reg { Bits(width = w*2) }
val r_lsb = Reg { Bits() }
val r_cnt = Reg { UFix(width = log2up(cycles+1)) }
val lhs_msb = Mux(io.dw === DW_64, io.in0(63), io.in0(31)).toBool
val lhs_sign = ((io.mul_fn === MUL_HS) || (io.mul_fn === MUL_HSU)) && lhs_msb
val lhs_hi = Mux(io.dw === DW_64, io.in0(63,32), Fill(32, lhs_sign))
val lhs_in = Cat(lhs_sign, lhs_hi, io.in0(31,0))
val dw = io.req.bits.fn(io.req.bits.fn.width-1)
val fn = io.req.bits.fn(io.req.bits.fn.width-2,0)
val rhs_msb = Mux(io.dw === DW_64, io.in1(63), io.in1(31)).toBool
val rhs_sign = (io.mul_fn === MUL_HS) && rhs_msb
val rhs_hi = Mux(io.dw === DW_64, io.in1(63,32), Fill(32, rhs_sign))
val rhs_in = Cat(rhs_sign, rhs_sign, rhs_hi, io.in1(31,0))
val lhs_msb = Mux(dw === DW_64, io.req.bits.in0(63), io.req.bits.in0(31)).toBool
val lhs_sign = ((fn === MUL_H) || (fn === MUL_HSU)) && lhs_msb
val lhs_hi = Mux(dw === DW_64, io.req.bits.in0(63,32), Fill(32, lhs_sign))
val lhs_in = Cat(lhs_sign, lhs_hi, io.req.bits.in0(31,0))
val do_kill = io.mul_kill && r_cnt === UFix(0) // can only kill on 1st cycle
val rhs_msb = Mux(dw === DW_64, io.req.bits.in1(63), io.req.bits.in1(31)).toBool
val rhs_sign = (fn === MUL_H) && rhs_msb
val rhs_hi = Mux(dw === DW_64, io.req.bits.in1(63,32), Fill(32, rhs_sign))
val rhs_in = Cat(rhs_sign, rhs_sign, rhs_hi, io.req.bits.in1(31,0))
when (io.mul_val && io.mul_rdy) {
val do_kill = io.req_kill && r_cnt === UFix(0) // can only kill on 1st cycle
when (io.req.valid && io.req.ready) {
r_val := Bool(true)
r_cnt := UFix(0, log2up(cycles+1))
r_dw := io.dw
r_fn := io.mul_fn
r_tag := io.mul_tag
r_dw := dw
r_fn := fn
r_tag := io.req_tag
r_lhs := lhs_in
r_prod:= rhs_in
r_lsb := Bool(false)
}
.elsewhen (io.result_val && io.result_rdy || do_kill) { // can only kill on first cycle
.elsewhen (io.resp_val && io.resp_rdy || do_kill) { // can only kill on first cycle
r_val := Bool(false)
}
val lhs_sext = Cat(r_lhs(width-2), r_lhs(width-2), r_lhs).toUFix
val lhs_twice = Cat(r_lhs(width-2), r_lhs, Bits(0,1)).toUFix
val lhs_sext = Cat(r_lhs(w-2), r_lhs(w-2), r_lhs).toUFix
val lhs_twice = Cat(r_lhs(w-2), r_lhs, Bits(0,1)).toUFix
var prod = r_prod
var lsb = r_lsb
@ -79,12 +122,12 @@ class rocketMultiplier extends Component {
Mux(prod(0) != prod(1), lhs_twice,
UFix(0)));
val sub = prod(1)
val adder_lhs = Cat(prod(width*2-1), prod(width*2-1,width)).toUFix
val adder_lhs = Cat(prod(w*2-1), prod(w*2-1,w)).toUFix
val adder_rhs = Mux(sub, ~addend, addend)
val adder_out = (adder_lhs + adder_rhs + sub.toUFix)(width,0)
val adder_out = (adder_lhs + adder_rhs + sub.toUFix)(w,0)
lsb = prod(1)
prod = Cat(adder_out(width), adder_out, prod(width-1,2))
prod = Cat(adder_out(w), adder_out, prod(w-1,2))
}
when (r_val && (r_cnt != UFix(cycles))) {
@ -99,10 +142,8 @@ class rocketMultiplier extends Component {
val mul_output = Mux(r_dw === DW_64, mul_output64, mul_output32_ext)
io.mul_rdy := !r_val
io.result := mul_output;
io.result_tag := r_tag;
io.result_val := r_val && (r_cnt === UFix(cycles))
}
io.req.ready := !r_val
io.resp_bits := mul_output;
io.resp_tag := r_tag;
io.resp_val := r_val && (r_cnt === UFix(cycles))
}

View File

@ -14,32 +14,42 @@ class queue[T <: Data](entries: Int, flushable: Boolean = false)(data: => T) ext
{
val io = new ioQueue(flushable)(data)
val enq_ptr = Reg(resetVal = UFix(0, log2up(entries)))
val deq_ptr = Reg(resetVal = UFix(0, log2up(entries)))
val maybe_full = Reg(resetVal = Bool(false))
io.deq.valid := maybe_full || enq_ptr != deq_ptr
io.enq.ready := !maybe_full || enq_ptr != deq_ptr
val do_enq = io.enq.ready && io.enq.valid
val do_deq = io.deq.ready && io.deq.valid
var enq_ptr = UFix(0)
var deq_ptr = UFix(0)
if (entries > 1)
{
enq_ptr = Reg(resetVal = UFix(0, log2up(entries)))
deq_ptr = Reg(resetVal = UFix(0, log2up(entries)))
when (do_deq) {
deq_ptr := deq_ptr + UFix(1)
}
when (do_enq) {
enq_ptr := enq_ptr + UFix(1)
}
if (flushable) {
when (io.flush) {
deq_ptr := UFix(0)
enq_ptr := UFix(0)
}
}
}
val maybe_full = Reg(resetVal = Bool(false))
when (do_enq != do_deq) {
maybe_full := do_enq
}
if (flushable) {
when (io.flush) {
deq_ptr := UFix(0)
enq_ptr := UFix(0)
maybe_full := Bool(false)
}
}
io.deq.valid := maybe_full || enq_ptr != deq_ptr
io.enq.ready := !maybe_full || enq_ptr != deq_ptr
io.deq.bits <> Mem(entries, do_enq, enq_ptr, io.enq.bits).read(deq_ptr)
}