1
0

clock gate integer datapath more aggressively

This commit is contained in:
Andrew Waterman
2012-11-17 06:48:44 -08:00
parent cc067026a2
commit 5a7777fe4d
6 changed files with 158 additions and 136 deletions

View File

@ -25,46 +25,42 @@ class Datapath(implicit conf: RocketConfiguration) extends Component
// execute definitions
val ex_reg_pc = Reg() { UFix() };
val ex_reg_inst = Reg() { Bits() };
val ex_reg_raddr1 = Reg() { UFix() };
val ex_reg_raddr2 = Reg() { UFix() };
val ex_reg_op2 = Reg() { Bits() };
val ex_reg_rs2 = Reg() { Bits() };
val ex_reg_rs1 = Reg() { Bits() };
val ex_reg_waddr = Reg() { UFix() };
val ex_reg_ctrl_fn_dw = Reg() { UFix() };
val ex_reg_ctrl_fn_alu = Reg() { UFix() };
val ex_reg_sel_alu2 = Reg() { UFix() };
val ex_reg_ctrl_sel_wb = Reg() { UFix() };
val ex_wdata = Bits()
val ex_reg_kill = Reg() { Bool() }
val ex_reg_kill = Reg{Bool()}
val ex_reg_rs1_bypass = Reg{Bool()}
val ex_reg_rs1_lsb = Reg{Bits()}
val ex_reg_rs1_msb = Reg{Bits()}
val ex_reg_rs2_bypass = Reg{Bool()}
val ex_reg_rs2_lsb = Reg{Bits()}
val ex_reg_rs2_msb = Reg{Bits()}
// memory definitions
val mem_reg_pc = Reg() { UFix() };
val mem_reg_inst = Reg() { Bits() };
val mem_reg_rs1 = Reg() { Bits() };
val mem_reg_rs2 = Reg() { Bits() };
val mem_reg_waddr = Reg() { UFix() };
val mem_reg_wdata = Reg() { Bits() };
val mem_reg_raddr1 = Reg() { UFix() };
val mem_reg_raddr2 = Reg() { UFix() };
val mem_reg_kill = Reg() { Bool() }
// writeback definitions
val wb_reg_pc = Reg() { UFix() };
val wb_reg_inst = Reg() { Bits() };
val wb_reg_rs1 = Reg() { Bits() };
val wb_reg_rs2 = Reg() { Bits() };
val wb_reg_waddr = Reg() { UFix() }
val wb_reg_wdata = Reg() { Bits() }
val wb_reg_vec_waddr = Reg() { UFix() }
val wb_reg_vec_wdata = Reg() { Bits() }
val wb_reg_raddr1 = Reg() { UFix() };
val wb_reg_raddr2 = Reg() { UFix() };
val wb_reg_ll_wb = Reg(resetVal = Bool(false));
val wb_wdata = Bits();
val wb_wen = io.ctrl.wb_wen && io.ctrl.wb_valid || wb_reg_ll_wb
// instruction decode stage
val id_inst = io.imem.resp.bits.data
val id_pc = io.imem.resp.bits.pc
debug(id_inst)
debug(id_pc)
val regfile_ = Mem(31){Bits(width = 64)}
def readRF(a: UFix) = regfile_(~a)
@ -74,45 +70,36 @@ class Datapath(implicit conf: RocketConfiguration) extends Component
val id_raddr2 = id_inst(21,17).toUFix;
// bypass muxes
val id_rs1_dmem_bypass = id_raddr1 != UFix(0) &&
Mux(io.ctrl.ex_wen && id_raddr1 === ex_reg_waddr, Bool(false),
Mux(io.ctrl.mem_wen && id_raddr1 === mem_reg_waddr, io.ctrl.mem_load,
Bool(false)))
val id_rs1_zero = id_raddr1 === UFix(0)
val id_rs1_ex_bypass = io.ctrl.ex_wen && id_raddr1 === ex_reg_waddr
val id_rs1_mem_bypass = io.ctrl.mem_wen && id_raddr1 === mem_reg_waddr
val id_rs1_bypass = id_rs1_zero || id_rs1_ex_bypass || id_rs1_mem_bypass
val id_rs1_bypass_src = Mux(id_rs1_zero, UFix(0), Mux(id_rs1_ex_bypass, UFix(1), UFix(2) | io.ctrl.mem_load))
val id_rs1 =
Mux(id_raddr1 === UFix(0), UFix(0),
Mux(io.ctrl.ex_wen && id_raddr1 === ex_reg_waddr, ex_wdata,
Mux(io.ctrl.mem_wen && id_raddr1 === mem_reg_waddr, mem_reg_wdata,
Mux((io.ctrl.wb_wen || wb_reg_ll_wb) && id_raddr1 === wb_reg_waddr, wb_wdata,
readRF(id_raddr1)))))
Mux(wb_wen && id_raddr1 === wb_reg_waddr, wb_wdata,
readRF(id_raddr1)))
val id_rs2_dmem_bypass = id_raddr2 != UFix(0) &&
Mux(io.ctrl.ex_wen && id_raddr2 === ex_reg_waddr, Bool(false),
Mux(io.ctrl.mem_wen && id_raddr2 === mem_reg_waddr, io.ctrl.mem_load,
Bool(false)))
val id_rs2_zero = id_raddr2 === UFix(0)
val id_rs2_ex_bypass = io.ctrl.ex_wen && id_raddr2 === ex_reg_waddr
val id_rs2_mem_bypass = io.ctrl.mem_wen && id_raddr2 === mem_reg_waddr
val id_rs2_bypass = id_rs2_zero || id_rs2_ex_bypass || id_rs2_mem_bypass
val id_rs2_bypass_src = Mux(id_rs2_zero, UFix(0), Mux(id_rs2_ex_bypass, UFix(1), UFix(2) | io.ctrl.mem_load))
val id_rs2 = Mux(id_raddr2 === UFix(0), UFix(0),
Mux(io.ctrl.ex_wen && id_raddr2 === ex_reg_waddr, ex_wdata,
Mux(io.ctrl.mem_wen && id_raddr2 === mem_reg_waddr, mem_reg_wdata,
Mux((io.ctrl.wb_wen || wb_reg_ll_wb) && id_raddr2 === wb_reg_waddr, wb_wdata,
readRF(id_raddr2)))))
Mux(wb_wen && id_raddr2 === wb_reg_waddr, wb_wdata,
readRF(id_raddr2)))
// immediate generation
val id_imm_bj = io.ctrl.sel_alu2 === A2_BTYPE || io.ctrl.sel_alu2 === A2_JTYPE
val id_imm_l = io.ctrl.sel_alu2 === A2_LTYPE
val id_imm_zero = io.ctrl.sel_alu2 === A2_ZERO || io.ctrl.sel_alu2 === A2_RTYPE
val id_imm_ibz = io.ctrl.sel_alu2 === A2_ITYPE || io.ctrl.sel_alu2 === A2_BTYPE || id_imm_zero
val id_imm_sign = Mux(id_imm_bj, id_inst(31),
Mux(id_imm_l, id_inst(26),
Mux(id_imm_zero, Bits(0,1),
id_inst(21)))) // IMM_ITYPE
val id_imm_small = Mux(id_imm_zero, Bits(0,12),
Cat(Mux(id_imm_bj, id_inst(31,27), id_inst(21,17)), id_inst(16,10)))
val id_imm = Cat(Fill(32, id_imm_sign),
Mux(id_imm_l, Cat(id_inst(26,7), Bits(0,12)),
Mux(id_imm_ibz, Cat(Fill(20, id_imm_sign), id_imm_small),
Cat(Fill(7, id_imm_sign), id_inst(31,7))))) // A2_JTYPE
val id_op2_dmem_bypass = id_rs2_dmem_bypass && io.ctrl.sel_alu2 === A2_RTYPE
val id_op2 = Mux(io.ctrl.sel_alu2 === A2_RTYPE, id_rs2, id_imm)
def imm(sel: Bits, inst: Bits) = {
val lsbs = Mux(sel === A2_LTYPE || sel === A2_ZERO, Bits(0),
Mux(sel === A2_BTYPE, Cat(inst(31,27), inst(16,10)),
Mux(sel === A2_JTYPE, inst(18,7),
inst(21,10))))
val msbs = Mux(sel === A2_LTYPE, inst(26,7),
Mux(sel === A2_JTYPE, inst(31,19).toFix,
Mux(sel === A2_ITYPE, inst(21), inst(31)).toFix))
Cat(msbs, lsbs).toFix
}
io.ctrl.inst := id_inst
io.fpu.inst := id_inst
@ -122,21 +109,47 @@ class Datapath(implicit conf: RocketConfiguration) extends Component
when (!io.ctrl.killd) {
ex_reg_pc := id_pc
ex_reg_inst := id_inst
ex_reg_raddr1 := id_raddr1
ex_reg_raddr2 := id_raddr2
ex_reg_op2 := id_op2
ex_reg_waddr := Mux(io.ctrl.sel_wa === WA_RD, id_inst(31,27).toUFix, RA)
ex_reg_ctrl_fn_dw := io.ctrl.fn_dw.toUFix
ex_reg_ctrl_fn_alu := io.ctrl.fn_alu
ex_reg_sel_alu2 := io.ctrl.sel_alu2
ex_reg_ctrl_sel_wb := io.ctrl.sel_wb
when (io.ctrl.ren1) { ex_reg_rs1 := id_rs1 }
when (io.ctrl.ren2) { ex_reg_rs2 := id_rs2 }
when (io.ctrl.ren1) {
ex_reg_rs1_bypass := id_rs1_bypass
ex_reg_rs1_lsb := id_rs1_bypass_src
when (!id_rs1_bypass) {
ex_reg_rs1_lsb := id_rs1(id_rs1_bypass_src.getWidth-1,0)
ex_reg_rs1_msb := id_rs1(63,id_rs1_bypass_src.getWidth)
}
}
when (io.ctrl.ren2) {
ex_reg_rs2_bypass := id_rs2_bypass
ex_reg_rs2_lsb := id_rs2_bypass_src
when (!id_rs2_bypass) {
ex_reg_rs2_lsb := id_rs2(id_rs2_bypass_src.getWidth-1,0)
ex_reg_rs2_msb := id_rs2(63,id_rs2_bypass_src.getWidth)
}
}
}
val ex_raddr1 = ex_reg_inst(26,22)
val ex_raddr2 = ex_reg_inst(21,17)
val dmem_resp_data = if (conf.fastLoadByte) io.dmem.resp.bits.data_subword else io.dmem.resp.bits.data
val ex_rs1 = Mux(Reg(id_rs1_dmem_bypass), dmem_resp_data, ex_reg_rs1)
val ex_rs2 = Mux(Reg(id_rs2_dmem_bypass), dmem_resp_data, ex_reg_rs2)
val ex_op2 = Mux(Reg(id_op2_dmem_bypass), dmem_resp_data, ex_reg_op2)
val ex_rs1 =
Mux(ex_reg_rs1_bypass && ex_reg_rs1_lsb === UFix(3), dmem_resp_data,
Mux(ex_reg_rs1_bypass && ex_reg_rs1_lsb === UFix(2), wb_reg_wdata,
Mux(ex_reg_rs1_bypass && ex_reg_rs1_lsb === UFix(1), mem_reg_wdata,
Mux(ex_reg_rs1_bypass && ex_reg_rs1_lsb === UFix(0), Bits(0),
Cat(ex_reg_rs1_msb, ex_reg_rs1_lsb)))))
val ex_rs2 =
Mux(ex_reg_rs2_bypass && ex_reg_rs2_lsb === UFix(3), dmem_resp_data,
Mux(ex_reg_rs2_bypass && ex_reg_rs2_lsb === UFix(2), wb_reg_wdata,
Mux(ex_reg_rs2_bypass && ex_reg_rs2_lsb === UFix(1), mem_reg_wdata,
Mux(ex_reg_rs2_bypass && ex_reg_rs2_lsb === UFix(0), Bits(0),
Cat(ex_reg_rs2_msb, ex_reg_rs2_lsb)))))
val ex_imm = imm(ex_reg_sel_alu2, ex_reg_inst)
val ex_op2 = Mux(ex_reg_sel_alu2 != A2_RTYPE, ex_imm, ex_rs2)
val alu = new ALU
alu.io.dw := ex_reg_ctrl_fn_dw;
@ -149,7 +162,7 @@ class Datapath(implicit conf: RocketConfiguration) extends Component
div.io.req.valid := io.ctrl.div_val
div.io.req.bits.fn := Cat(ex_reg_ctrl_fn_dw, io.ctrl.div_fn)
div.io.req.bits.in0 := ex_rs1
div.io.req.bits.in1 := ex_op2
div.io.req.bits.in1 := ex_rs2
div.io.req_tag := ex_reg_waddr
div.io.req_kill := io.ctrl.div_kill
div.io.resp_rdy := Bool(true)
@ -168,7 +181,7 @@ class Datapath(implicit conf: RocketConfiguration) extends Component
mul_io.req.valid := io.ctrl.mul_val
mul_io.req.bits.fn := Cat(ex_reg_ctrl_fn_dw, io.ctrl.mul_fn)
mul_io.req.bits.in0 := ex_rs1
mul_io.req.bits.in1 := ex_op2
mul_io.req.bits.in1 := ex_rs2
mul_io.req_tag := ex_reg_waddr
mul_io.req_kill := io.ctrl.mul_kill
mul_io.resp_rdy := Bool(true)
@ -199,11 +212,10 @@ class Datapath(implicit conf: RocketConfiguration) extends Component
// processor control regfile read
val pcr = new rocketDpathPCR()
pcr.io.r.en := io.ctrl.pcr != PCR_N
pcr.io.r.addr := wb_reg_raddr1
pcr.io.r.addr := wb_reg_inst(26,22).toUFix
pcr.io.host <> io.host
pcr.io <> io.ctrl
pcr.io.pc := wb_reg_pc
io.ctrl.pcr_replay := pcr.io.replay
io.ptw.ptbr := pcr.io.ptbr
@ -211,7 +223,7 @@ class Datapath(implicit conf: RocketConfiguration) extends Component
io.ptw.status := pcr.io.status
// branch resolution logic
io.ctrl.jalr_eq := ex_reg_rs1 === id_pc.toFix && ex_reg_op2(id_imm_small.getWidth-1,0) === UFix(0)
io.ctrl.jalr_eq := ex_rs1 === id_pc.toFix && ex_reg_inst(11,0) === UFix(0)
io.ctrl.ex_br_taken :=
Mux(io.ctrl.ex_br_type === BR_EQ, ex_rs1 === ex_rs2,
Mux(io.ctrl.ex_br_type === BR_NE, ex_rs1 != ex_rs2,
@ -221,21 +233,17 @@ class Datapath(implicit conf: RocketConfiguration) extends Component
Mux(io.ctrl.ex_br_type === BR_GEU, ex_rs1 >= ex_rs2,
io.ctrl.ex_br_type === BR_J))))))
val ex_pc_plus4 = ex_reg_pc + UFix(4)
val ex_branch_target = ex_reg_pc + Cat(ex_reg_op2(VADDR_BITS-1,0), Bits(0,1)).toUFix
val ex_pc_plus4 = ex_reg_pc + 4
val ex_branch_target = (ex_reg_pc.toFix + (ex_imm << 1)).toUFix
// time stamp counter
val tsc_reg = Reg(resetVal = UFix(0,64));
tsc_reg := tsc_reg + UFix(1);
// instructions retired counter
val irt_reg = Reg(resetVal = UFix(0,64));
when (io.ctrl.wb_valid) { irt_reg := irt_reg + UFix(1); }
val tsc_reg = WideCounter(64)
val irt_reg = WideCounter(64, io.ctrl.wb_valid)
// writeback select mux
ex_wdata :=
val ex_wdata =
Mux(ex_reg_ctrl_sel_wb === WB_PC, ex_pc_plus4.toFix,
Mux(ex_reg_ctrl_sel_wb === WB_TSC, tsc_reg,
Mux(ex_reg_ctrl_sel_wb === WB_IRT, irt_reg,
Mux(ex_reg_ctrl_sel_wb === WB_TSC, tsc_reg.value,
Mux(ex_reg_ctrl_sel_wb === WB_IRT, irt_reg.value,
alu.io.out))).toBits // WB_ALU
// memory stage
@ -243,11 +251,10 @@ class Datapath(implicit conf: RocketConfiguration) extends Component
when (!ex_reg_kill) {
mem_reg_pc := ex_reg_pc
mem_reg_inst := ex_reg_inst
mem_reg_rs1 := ex_rs1
mem_reg_rs2 := StoreGen(io.ctrl.ex_mem_type, Bits(0), ex_rs2).data
mem_reg_waddr := ex_reg_waddr
mem_reg_wdata := ex_wdata
mem_reg_raddr1 := ex_reg_raddr1
mem_reg_raddr2 := ex_reg_raddr2
}
// for load/use hazard detection (load byte/halfword)
@ -276,6 +283,7 @@ class Datapath(implicit conf: RocketConfiguration) extends Component
io.ctrl.mem_ll_waddr := dmem_resp_waddr
io.ctrl.mem_ll_wb := Bool(true)
}
when (io.ctrl.mem_ll_waddr === UFix(0)) { io.ctrl.mem_ll_wb := Bool(false) }
io.fpu.dmem_resp_val := io.dmem.resp.valid && dmem_resp_fpu
io.fpu.dmem_resp_data := io.dmem.resp.bits.data
@ -286,11 +294,8 @@ class Datapath(implicit conf: RocketConfiguration) extends Component
when (!mem_reg_kill) {
wb_reg_pc := mem_reg_pc
wb_reg_inst := mem_reg_inst
wb_reg_rs1 := mem_reg_rs1
wb_reg_rs2 := mem_reg_rs2
wb_reg_vec_waddr := mem_reg_waddr
wb_reg_vec_wdata := mem_reg_wdata
wb_reg_raddr1 := mem_reg_raddr1
wb_reg_raddr2 := mem_reg_raddr2
wb_reg_waddr := mem_reg_waddr
wb_reg_wdata := Mux(io.ctrl.mem_fp_val && io.ctrl.mem_wen, io.fpu.toint_data, mem_reg_wdata)
}
@ -299,6 +304,9 @@ class Datapath(implicit conf: RocketConfiguration) extends Component
wb_reg_waddr := io.ctrl.mem_ll_waddr
wb_reg_wdata := mem_ll_wdata
}
wb_wdata := Mux(io.ctrl.wb_load, io.dmem.resp.bits.data_subword,
Mux(io.ctrl.pcr != PCR_N, pcr.io.r.data,
wb_reg_wdata))
if (HAVE_VEC)
{
@ -310,11 +318,9 @@ class Datapath(implicit conf: RocketConfiguration) extends Component
vec.io.valid := io.ctrl.wb_valid && pcr.io.status(SR_EV)
vec.io.inst := wb_reg_inst
vec.io.waddr := wb_reg_vec_waddr
vec.io.raddr1 := wb_reg_raddr1
vec.io.vecbank := pcr.io.vecbank
vec.io.vecbankcnt := pcr.io.vecbankcnt
vec.io.wdata := wb_reg_vec_wdata
vec.io.wdata := wb_reg_wdata
vec.io.rs2 := wb_reg_rs2
pcr.io.vec_irq_aux := vec.io.irq_aux
@ -322,29 +328,10 @@ class Datapath(implicit conf: RocketConfiguration) extends Component
pcr.io.vec_nxregs := vec.io.nxregs
pcr.io.vec_nfregs := vec.io.nfregs
wb_wdata :=
Mux(vec.io.wen, Cat(Bits(0,52), vec.io.appvl),
Mux(io.ctrl.wb_load, io.dmem.resp.bits.data_subword,
wb_reg_wdata))
}
else
{
pcr.io.vec_irq_aux := UFix(0)
pcr.io.vec_appvl := UFix(0)
pcr.io.vec_nxregs := UFix(0)
pcr.io.vec_nfregs := UFix(0)
wb_wdata :=
Mux(io.ctrl.wb_load, io.dmem.resp.bits.data_subword,
wb_reg_wdata)
when (vec.io.wen) { wb_wdata := vec.io.appvl }
}
val rf_wen = io.ctrl.wb_wen || wb_reg_ll_wb
val rf_waddr = wb_reg_waddr
val rf_wdata = Mux(io.ctrl.wb_wen && io.ctrl.pcr != PCR_N, pcr.io.r.data, wb_wdata)
List(rf_wen, rf_waddr, rf_wdata).map(debug _)
when (rf_wen) { writeRF(rf_waddr, rf_wdata) }
when (wb_wen) { writeRF(wb_reg_waddr, wb_wdata) }
io.ctrl.wb_waddr := wb_reg_waddr
// scoreboard clear (for div/mul and D$ load miss writebacks)
@ -352,7 +339,7 @@ class Datapath(implicit conf: RocketConfiguration) extends Component
io.ctrl.fp_sboard_clra := dmem_resp_waddr
// processor control regfile write
pcr.io.w.addr := wb_reg_raddr1
pcr.io.w.addr := wb_reg_inst(26,22).toUFix
pcr.io.w.en := io.ctrl.pcr === PCR_T || io.ctrl.pcr === PCR_S || io.ctrl.pcr === PCR_C
pcr.io.w.data := Mux(io.ctrl.pcr === PCR_S, pcr.io.r.data | wb_reg_wdata,
Mux(io.ctrl.pcr === PCR_C, pcr.io.r.data & ~wb_reg_wdata,
@ -365,4 +352,10 @@ class Datapath(implicit conf: RocketConfiguration) extends Component
Mux(io.ctrl.sel_pc === PC_EX, Mux(io.ctrl.ex_jalr, ex_effective_address, ex_branch_target),
Mux(io.ctrl.sel_pc === PC_PCR, Cat(pcr.io.evec(VADDR_BITS-1), pcr.io.evec).toUFix,
wb_reg_pc))) // PC_WB
// expose debug signals to testbench
// XXX debug() doesn't right, so create a false dependence
val debugList = List(wb_reg_pc, wb_reg_inst, wb_wen, wb_reg_waddr, wb_wdata, wb_reg_rs1, wb_reg_rs2)
pcr.io.pc := wb_reg_pc | (debugList.map(d => d^d).reduce(_|_)).toUFix
debugList.foreach(debug _)
}