From 08b6517a2386f5da1b85546c8e33c201be0e617a Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Sun, 12 Feb 2012 20:12:53 -0800 Subject: [PATCH] add FP ops mftx, mxtf, mtfsr, mffsr --- rocket/src/main/scala/cpu.scala | 2 + rocket/src/main/scala/ctrl.scala | 24 ++-- rocket/src/main/scala/dpath.scala | 7 +- rocket/src/main/scala/fpu.scala | 180 ++++++++++++++++++++++-------- 4 files changed, 156 insertions(+), 57 deletions(-) diff --git a/rocket/src/main/scala/cpu.scala b/rocket/src/main/scala/cpu.scala index 0d0aee76..8e106e0c 100644 --- a/rocket/src/main/scala/cpu.scala +++ b/rocket/src/main/scala/cpu.scala @@ -119,6 +119,8 @@ class rocketProc extends Component dpath.io.fpu <> fpu.io.dpath ctrl.io.fpu <> fpu.io.ctrl } + else + ctrl.io.fpu.dec.valid := Bool(false) ctrl.io.ext_mem.req_val := Bool(false) dpath.io.ext_mem.req_val := Bool(false) diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 6bf607f1..01937237 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -209,10 +209,12 @@ class rocketCtrl extends Component RDCYCLE-> List(Y, N,BR_N, REN_N,REN_N,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_TSC,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), RDINSTRET-> List(Y, N,BR_N, REN_N,REN_N,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_IRT,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - // Instructions that have not yet been implemented - // Faking these for now so akaros will boot - MFFSR-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - MTFSR-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,Y), + MFTX_S-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + MFTX_D-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + MXTF_S-> List(FPU_Y,N,BR_N, REN_N,REN_Y,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + MXTF_D-> List(FPU_Y,N,BR_N, REN_N,REN_Y,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + MFFSR-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + MTFSR-> List(FPU_Y,N,BR_N, REN_N,REN_Y,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), FLW-> List(FPU_Y,N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_Y,M_XRD, MT_W, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), FLD-> List(FPU_Y,N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_Y,M_XRD, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), FSW-> List(FPU_Y,N,BR_N, REN_N,REN_Y,A2_BTYPE,DW_XPR,FN_ADD, M_Y,M_XWR, MT_W, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), @@ -335,6 +337,7 @@ class rocketCtrl extends Component val wb_reg_exception = Reg(resetVal = Bool(false)); val wb_reg_replay = Reg(resetVal = Bool(false)); val wb_reg_cause = Reg(){UFix()}; + val wb_reg_fp_val = Reg(resetVal = Bool(false)); val take_pc = Wire() { Bool() }; @@ -479,6 +482,7 @@ class rocketCtrl extends Component wb_reg_inst_ei := Bool(false); wb_reg_flush_inst := Bool(false); wb_reg_div_mul_val := Bool(false); + wb_reg_fp_val := Bool(false) } .otherwise { wb_reg_wen := mem_reg_wen; @@ -488,6 +492,7 @@ class rocketCtrl extends Component wb_reg_inst_ei := mem_reg_inst_ei; wb_reg_flush_inst := mem_reg_flush_inst; wb_reg_div_mul_val := mem_reg_div_mul_val; + wb_reg_fp_val := mem_reg_fp_val } val sboard = new rocketCtrlSboard(); @@ -592,7 +597,7 @@ class rocketCtrl extends Component ex_reg_replay || ex_reg_mem_val && !(io.dmem.req_rdy && io.dtlb_rdy) || ex_reg_div_val && !io.dpath.div_rdy || ex_reg_mul_val && !io.dpath.mul_rdy || - io.fpu.nack + ex_reg_fp_val && io.fpu.nack val kill_ex = take_pc_wb || replay_ex mem_reg_replay := replay_ex && !take_pc_wb; @@ -634,8 +639,8 @@ class rocketCtrl extends Component io.fpu.dec.ren2 && id_raddr2 === io.dpath.ex_waddr || io.fpu.dec.ren3 && id_raddr3 === io.dpath.ex_waddr || io.fpu.dec.wen && id_waddr === io.dpath.ex_waddr) - val id_ex_hazard = data_hazard_ex && (ex_reg_mem_val || ex_reg_div_val || ex_reg_mul_val) || - fp_data_hazard_ex && ex_reg_mem_val + val id_ex_hazard = data_hazard_ex && (ex_reg_mem_val || ex_reg_div_val || ex_reg_mul_val || ex_reg_fp_val) || + fp_data_hazard_ex && (ex_reg_mem_val || ex_reg_fp_val) // stall for RAW/WAW hazards on LB/LH and mul/div in memory stage. val mem_mem_cmd_bh = @@ -650,7 +655,8 @@ class rocketCtrl extends Component io.fpu.dec.ren2 && id_raddr2 === io.dpath.mem_waddr || io.fpu.dec.ren3 && id_raddr3 === io.dpath.mem_waddr || io.fpu.dec.wen && id_waddr === io.dpath.mem_waddr) - val id_mem_hazard = data_hazard_mem && (mem_reg_mem_val && mem_mem_cmd_bh || mem_reg_div_mul_val) + val id_mem_hazard = data_hazard_mem && (mem_reg_mem_val && mem_mem_cmd_bh || mem_reg_div_mul_val || mem_reg_fp_val) || + fp_data_hazard_mem && mem_reg_fp_val id_load_use := mem_reg_mem_val && (data_hazard_mem || fp_data_hazard_mem) // stall for RAW/WAW hazards on load/AMO misses and mul/div in writeback. @@ -664,7 +670,7 @@ class rocketCtrl extends Component io.fpu.dec.ren3 && id_raddr3 === io.dpath.wb_waddr || io.fpu.dec.wen && id_waddr === io.dpath.wb_waddr) val id_wb_hazard = data_hazard_wb && (wb_reg_dcache_miss || wb_reg_div_mul_val) || - fp_data_hazard_wb && wb_reg_dcache_miss + fp_data_hazard_wb && (wb_reg_dcache_miss || wb_reg_fp_val) val ctrl_stalld = !take_pc && diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index 37cb3d8a..ec87dfe2 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -292,6 +292,8 @@ class rocketDpath extends Component mul.io.mul_tag := ex_reg_waddr; mul.io.in0 := ex_reg_rs1; mul.io.in1 := ex_reg_rs2; + + io.fpu.fromint_data := ex_reg_rs1 io.ctrl.mul_rdy := mul.io.mul_rdy io.ctrl.mul_result_val := mul.io.result_val; @@ -302,7 +304,7 @@ class rocketDpath extends Component // D$ request interface (registered inside D$ module) // other signals (req_val, req_rdy) connect to control module io.dmem.req_addr := ex_effective_address.toUFix; - io.dmem.req_data := (if (HAVE_FPU) Mux(io.ctrl.mem_fp_val, io.fpu.store_data, mem_reg_rs2) else mem_reg_rs2) + io.dmem.req_data := Mux(io.ctrl.mem_fp_val, io.fpu.store_data, mem_reg_rs2) io.dmem.req_tag := Cat(ex_reg_waddr, io.ctrl.ex_fp_val, io.ctrl.ex_ext_mem_val).toUFix // processor control regfile read @@ -392,7 +394,8 @@ class rocketDpath extends Component mem_reg_waddr))) val mem_ll_wdata = Mux(div_result_val, div_result, Mux(mul_result_val, mul_result, - mem_reg_wdata)) + Mux(io.ctrl.mem_fp_val && io.ctrl.mem_wen, io.fpu.toint_data, + mem_reg_wdata))) val mem_ll_wb = dmem_resp_replay || div_result_val || mul_result_val io.fpu.dmem_resp_val := io.dmem.resp_val && dmem_resp_fpu diff --git a/rocket/src/main/scala/fpu.scala b/rocket/src/main/scala/fpu.scala index 8b98e01e..1bbb5e13 100644 --- a/rocket/src/main/scala/fpu.scala +++ b/rocket/src/main/scala/fpu.scala @@ -7,40 +7,41 @@ import Instructions._ object rocketFPConstants { - val FCMD_ADD = Bits("b000000") - val FCMD_SUB = Bits("b000001") - val FCMD_MUL = Bits("b000010") - val FCMD_DIV = Bits("b000011") - val FCMD_SQRT = Bits("b000100") - val FCMD_SGNINJ = Bits("b000101") - val FCMD_SGNINJN = Bits("b000110") - val FCMD_SGNMUL = Bits("b000111") - val FCMD_TRUNC_L = Bits("b001000") - val FCMD_TRUNCU_L = Bits("b001001") - val FCMD_TRUNC_W = Bits("b001010") - val FCMD_TRUNCU_W = Bits("b001011") - val FCMD_CVT_L = Bits("b001100") - val FCMD_CVTU_L = Bits("b001101") - val FCMD_CVT_W = Bits("b001110") - val FCMD_CVTU_W = Bits("b001111") - val FCMD_CVT_S = Bits("b010000") - val FCMD_CVT_D = Bits("b010001") - val FCMD_C_EQ = Bits("b010101") - val FCMD_C_LT = Bits("b010110") - val FCMD_C_LE = Bits("b010111") - val FCMD_MIN = Bits("b011000") - val FCMD_MAX = Bits("b011001") - val FCMD_MF = Bits("b011100") - val FCMD_MFFSR = Bits("b011101") - val FCMD_MT = Bits("b011110") - val FCMD_MTFSR = Bits("b011111") - val FCMD_MADD = Bits("b100100") - val FCMD_MSUB = Bits("b100101") - val FCMD_NMSUB = Bits("b100110") - val FCMD_NMADD = Bits("b100111") - val FCMD_LOAD = Bits("b111000") - val FCMD_STORE = Bits("b111001") - val FCMD_WIDTH = 6 + val FCMD_ADD = Bits("b000000") + val FCMD_SUB = Bits("b000001") + val FCMD_MUL = Bits("b000010") + val FCMD_DIV = Bits("b000011") + val FCMD_SQRT = Bits("b000100") + val FCMD_SGNINJ = Bits("b000101") + val FCMD_SGNINJN = Bits("b000110") + val FCMD_SGNMUL = Bits("b000111") + val FCMD_CVT_L_FMT = Bits("b001000") + val FCMD_CVT_LU_FMT = Bits("b001001") + val FCMD_CVT_W_FMT = Bits("b001010") + val FCMD_CVT_WU_FMT = Bits("b001011") + val FCMD_CVT_FMT_L = Bits("b001100") + val FCMD_CVT_FMT_LU = Bits("b001101") + val FCMD_CVT_FMT_W = Bits("b001110") + val FCMD_CVT_FMT_WU = Bits("b001111") + val FCMD_CVT_FMT_S = Bits("b010000") + val FCMD_CVT_FMT_D = Bits("b010001") + val FCMD_EQ = Bits("b010101") + val FCMD_LT = Bits("b010110") + val FCMD_LE = Bits("b010111") + val FCMD_MIN = Bits("b011000") + val FCMD_MAX = Bits("b011001") + val FCMD_MFTX = Bits("b011100") + val FCMD_MFFSR = Bits("b011101") + val FCMD_MXTF = Bits("b011110") + val FCMD_MTFSR = Bits("b011111") + val FCMD_MADD = Bits("b100100") + val FCMD_MSUB = Bits("b100101") + val FCMD_NMSUB = Bits("b100110") + val FCMD_NMADD = Bits("b100111") + val FCMD_LOAD = Bits("b111000") + val FCMD_STORE = Bits("b111001") + val FCMD_WIDTH = 6 + val FSR_WIDTH = 8 } import rocketFPConstants._ @@ -145,8 +146,12 @@ class rocketFPUDecoder extends Component FLD -> List(Y,FCMD_LOAD, Y,N,N,N,N,N,N,N,N), FSW -> List(Y,FCMD_STORE, N,N,Y,N,Y,N,N,Y,N), FSD -> List(Y,FCMD_STORE, N,N,Y,N,N,N,N,Y,N), - MTFSR -> List(Y,FCMD_MTFSR, N,N,N,N,X,N,Y,N,Y), - MFFSR -> List(Y,FCMD_MFFSR, N,N,N,N,X,N,Y,N,Y) + MXTF_S -> List(Y,FCMD_MXTF, Y,N,N,N,Y,Y,N,N,N), + MXTF_D -> List(Y,FCMD_MXTF, Y,N,N,N,N,Y,N,N,N), + MFTX_S -> List(Y,FCMD_MFTX, N,Y,N,N,Y,N,Y,N,N), + MFTX_D -> List(Y,FCMD_MFTX, N,Y,N,N,N,N,Y,N,N), + MTFSR -> List(Y,FCMD_MTFSR, N,N,N,N,Y,Y,Y,N,Y), + MFFSR -> List(Y,FCMD_MFFSR, N,N,N,N,Y,N,Y,N,Y) )) val valid :: cmd :: wen :: ren1 :: ren2 :: ren3 :: single :: fromint :: toint :: store :: fsr :: Nil = decoder @@ -165,8 +170,10 @@ class rocketFPUDecoder extends Component class ioDpathFPU extends Bundle { val inst = Bits(32, OUTPUT) + val fromint_data = Bits(64, OUTPUT) val store_data = Bits(64, INPUT) + val toint_data = Bits(64, INPUT) val dmem_resp_val = Bool(OUTPUT) val dmem_resp_tag = UFix(5, OUTPUT) @@ -186,17 +193,62 @@ class rocketFPIntUnit extends Component val io = new Bundle { val single = Bool(INPUT) val cmd = Bits(FCMD_WIDTH, INPUT) + val fsr = Bits(FSR_WIDTH, INPUT) val in = Bits(65, INPUT) - val out = Bits(64, OUTPUT) + val store_data = Bits(64, OUTPUT) + val toint_data = Bits(64, OUTPUT) + val exc = Bits(5, OUTPUT) } val unrecoded_s = io.in(31,0) val unrecoded_d = io.in - val out_s = unrecoded_s - val out_d = unrecoded_d + io.store_data := Mux(io.single, Cat(unrecoded_s, unrecoded_s), unrecoded_d) - io.out := Mux(io.single, Cat(out_s, out_s), out_d) + val scmp = Bool(false) + val scmp_exc = Bits(0) + + val s2i = UFix(0) + val s2i_exc = Bits(0) + + val dcmp = Bool(false) + val dcmp_exc = Bits(0) + + val d2i = UFix(0) + val d2i_exc = Bits(0) + + // output muxing + val (out_s, exc_s) = (Wire() { Bits() }, Wire() { Bits() }) + out_s := Cat(Fill(32, unrecoded_s(31)), unrecoded_s) + exc_s := Bits(0) + val (out_d, exc_d) = (Wire() { Bits() }, Wire() { Bits() }) + out_d := unrecoded_d + exc_d := Bits(0) + + when (io.cmd === FCMD_MTFSR || io.cmd === FCMD_MFFSR) { + out_s := io.fsr + } + when (io.cmd === FCMD_CVT_W_FMT || io.cmd === FCMD_CVT_WU_FMT) { + out_s := Cat(Fill(32, s2i(31)), s2i(31,0)) + exc_s := s2i_exc + out_d := Cat(Fill(32, d2i(31)), d2i(31,0)) + exc_d := d2i_exc + } + when (io.cmd === FCMD_CVT_L_FMT || io.cmd === FCMD_CVT_LU_FMT) { + out_s := s2i + exc_s := s2i_exc + out_d := d2i + exc_d := d2i_exc + } + when (io.cmd === FCMD_EQ || io.cmd === FCMD_LT || io.cmd === FCMD_LE) { + out_s := scmp + exc_s := scmp_exc + out_d := dcmp + exc_d := dcmp_exc + } + + io.toint_data := Mux(io.single, out_s, out_d) + io.exc := Mux(io.single, exc_s, exc_d) } class rocketFPU extends Component @@ -229,21 +281,36 @@ class rocketFPU extends Component load_wb_tag := io.dpath.dmem_resp_tag } + val fsr_rm = Reg() { Bits(width = 3) } + val fsr_exc = Reg() { Bits(width = 5) } + // regfile val regfile = Mem(32, load_wb, load_wb_tag, load_wb_data); regfile.setReadLatency(0); regfile.setTarget('inst); - val ex_rs1 = regfile.read(reg_inst(16,12)) + val ex_rs1 = regfile.read(reg_inst(26,22)) val ex_rs2 = regfile.read(reg_inst(21,17)) - val ex_rs3 = regfile.read(reg_inst(26,22)) + val ex_rs3 = regfile.read(reg_inst(16,12)) + val fp_fromint_val = Reg(resetVal = Bool(false)) + val fp_fromint_data = Reg() { Bits() } + val fp_toint_val = Reg(resetVal = Bool(false)) val fp_toint_data = Reg() { Bits() } val fp_toint_single = Reg() { Bool() } val fp_toint_cmd = Reg() { Bits() } + val fp_waddr = Reg() { Bits() } - when (reg_valid) { + fp_fromint_val := Bool(false) + fp_toint_val := Bool(false) + when (reg_valid && !io.ctrl.killx) { + fp_waddr := reg_inst(31,27) + when (ctrl.fromint) { + fp_fromint_val := Bool(true) + fp_fromint_data := io.dpath.fromint_data + } when (ctrl.toint) { + fp_toint_val := Bool(true) fp_toint_data := ex_rs1 } when (ctrl.store) { @@ -259,12 +326,33 @@ class rocketFPU extends Component val fpiu = new rocketFPIntUnit fpiu.io.single := ctrl.single fpiu.io.cmd := ctrl.cmd + fpiu.io.fsr := Cat(fsr_rm, fsr_exc) fpiu.io.in := fp_toint_data - io.dpath.store_data := fpiu.io.out + io.dpath.store_data := fpiu.io.store_data + io.dpath.toint_data := fpiu.io.toint_data - val fsr_busy = ctrl.fsr && Bool(false) + val retire_toint = Reg(!io.ctrl.killm && fp_toint_val, resetVal = Bool(false)) + val retire_toint_exc = Reg(fpiu.io.exc) + val retire_fromint = Reg(!io.ctrl.killm && fp_fromint_val, resetVal = Bool(false)) + val retire_fromint_wdata = Reg(fp_fromint_data) + val retire_fromint_waddr = Reg(fp_waddr) + + when (retire_toint) { + fsr_exc := fsr_exc | retire_toint_exc + } + when (retire_toint && retire_fromint) { // MTFSR + fsr_exc := retire_fromint_wdata(4,0) + fsr_rm := retire_fromint_wdata(7,5) + } + + regfile.write(retire_fromint_waddr, retire_fromint_wdata, retire_fromint && !retire_toint) + + val fp_inflight = fp_toint_val || retire_toint || fp_fromint_val || retire_fromint + val mtfsr_inflight = fp_toint_val && fp_fromint_val || retire_toint && retire_fromint + val fsr_busy = ctrl.fsr && fp_inflight || mtfsr_inflight val units_busy = Bool(false) - io.ctrl.nack := reg_valid && (fsr_busy || units_busy) + val write_port_busy = Bool(false) + io.ctrl.nack := fsr_busy || units_busy || write_port_busy io.ctrl.dec <> fp_decoder.io.sigs }