diff --git a/rocket/src/main/scala/consts.scala b/rocket/src/main/scala/consts.scala index 4159e952..1568abb5 100644 --- a/rocket/src/main/scala/consts.scala +++ b/rocket/src/main/scala/consts.scala @@ -244,12 +244,17 @@ object Constants val VCMD_TF = UFix(3, 3) val VCMD_MX = UFix(4, 3) val VCMD_MF = UFix(5, 3) + val VCMD_A = UFix(6, 3) val VCMD_X = UFix(0, 3) val VIMM_VLEN = UFix(0, 1) val VIMM_ALU = UFix(1, 1) val VIMM_X = UFix(0, 1) + val VIMM2_RS2 = UFix(0, 1) + val VIMM2_ALU = UFix(1, 1) + val VIMM2_X = UFix(0, 1) + val DTLB_CPU = 0 val DTLB_VEC = 1 val DTLB_VPF = 2 diff --git a/rocket/src/main/scala/cpu.scala b/rocket/src/main/scala/cpu.scala index 0ba3f74b..90039b1f 100644 --- a/rocket/src/main/scala/cpu.scala +++ b/rocket/src/main/scala/cpu.scala @@ -169,7 +169,8 @@ class rocketProc(resetSignal: Bool = null) extends Component(resetSignal) vu.io.vec_ximm1q.bits := dpath.io.vec_iface.vximm1q_bits vu.io.vec_ximm2q.valid := ctrl.io.vec_iface.vximm2q_valid vu.io.vec_ximm2q.bits := dpath.io.vec_iface.vximm2q_bits - vu.io.vec_cntq <> dpath.io.vec_iface.vcntq + vu.io.vec_cntq.valid := ctrl.io.vec_iface.vcntq_valid + vu.io.vec_cntq.bits := dpath.io.vec_iface.vcntq_bits // prefetch queues vu.io.vec_pfcmdq.valid := ctrl.io.vec_iface.vpfcmdq_valid @@ -178,22 +179,31 @@ class rocketProc(resetSignal: Bool = null) extends Component(resetSignal) vu.io.vec_pfximm1q.bits := dpath.io.vec_iface.vximm1q_bits vu.io.vec_pfximm2q.valid := ctrl.io.vec_iface.vpfximm2q_valid vu.io.vec_pfximm2q.bits := dpath.io.vec_iface.vximm2q_bits + // vu.io.vec_pfcntq.valid := ctrl.io.vec_iface.vpfcntq_valid + // vu.io.vec_pfcntq.bits := dpath.io.vec_iface.vcntq_bits // don't have to use pf ready signals // if cmdq is not a load or store ctrl.io.vec_iface.vcmdq_ready := vu.io.vec_cmdq.ready ctrl.io.vec_iface.vximm1q_ready := vu.io.vec_ximm1q.ready ctrl.io.vec_iface.vximm2q_ready := vu.io.vec_ximm2q.ready + ctrl.io.vec_iface.vcntq_ready := vu.io.vec_cntq.ready ctrl.io.vec_iface.vpfcmdq_ready := vu.io.vec_pfcmdq.ready ctrl.io.vec_iface.vpfximm1q_ready := vu.io.vec_pfximm1q.ready ctrl.io.vec_iface.vpfximm2q_ready := vu.io.vec_pfximm2q.ready + // ctrl.io.vec_iface.vpfcntq_ready := vu.io.vec_pfcntq.ready + ctrl.io.vec_iface.vpfcntq_ready := Bool(true) + ctrl.io.vec_iface.vackq_valid := vu.io.vec_ackq.valid vu.io.vec_ackq.ready := ctrl.io.vec_iface.vackq_ready // exceptions vu.io.cpu_exception.addr := dpath.io.vec_iface.eaddr.toUFix vu.io.cpu_exception.exception := dpath.io.vec_iface.exception - ctrl.io.vec_iface.exception_done := vu.io.done + ctrl.io.vec_iface.exception_ack_valid := vu.io.exception_ack_valid + vu.io.exception_ack_ready := ctrl.io.vec_iface.exception_ack_ready + ctrl.io.vec_iface.kill_ack_valid := vu.io.kill_ack_valid + vu.io.kill_ack_ready := ctrl.io.vec_iface.kill_ack_ready // hooking up vector memory interface val storegen = new StoreDataGen diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index ac544a68..cc1d7f69 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -279,7 +279,15 @@ class rocketCtrl extends Component VFLSTD-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), VFLSTW-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), VFSSTD-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), - VFSSTW-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N) + VFSSTW-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), + + // Vector Supervisor Stuff + VENQCMD-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,Y,Y), + VENQIMM1-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,Y,Y), + VENQIMM2-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,Y,Y), + VENQCNT-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,Y,Y), + VWAITXCPT-> List(VEC_Y,Y,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X, SYNC_N,N,N,N,Y), + VWAITKILL-> List(VEC_Y,Y,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X, SYNC_N,N,N,N,Y) )) val id_int_val :: id_vec_val :: id_br_type :: id_renx2 :: id_renx1 :: id_sel_alu2 :: id_fn_dw :: id_fn_alu :: cs0 = cs @@ -581,7 +589,7 @@ class rocketCtrl extends Component } var vec_replay = Bool(false) - var vec_cpfence = Bool(false) + var vec_stalld = Bool(false) if (HAVE_VEC) { // vector control @@ -594,7 +602,7 @@ class rocketCtrl extends Component vec.io.exception := wb_reg_exception vec_replay = vec.io.replay - vec_cpfence = vec.io.cpfence + vec_stalld = vec.io.stalld } // exception handling @@ -749,7 +757,7 @@ class rocketCtrl extends Component id_mem_val.toBool && !(io.dmem.req_rdy && io.dtlb_rdy) || id_vec_val.toBool && !(io.vec_iface.vcmdq_ready && io.vec_iface.vximm1q_ready && io.vec_iface.vximm2q_ready) || // being conservative ((id_sync === SYNC_D) || (id_sync === SYNC_I)) && !io.dmem.req_rdy || - vec_cpfence + vec_stalld ); val ctrl_stallf = ctrl_stalld; diff --git a/rocket/src/main/scala/ctrl_vec.scala b/rocket/src/main/scala/ctrl_vec.scala index 3e8aefd5..43c6c321 100644 --- a/rocket/src/main/scala/ctrl_vec.scala +++ b/rocket/src/main/scala/ctrl_vec.scala @@ -10,11 +10,11 @@ class ioCtrlDpathVec extends Bundle val valid = Bool(INPUT) val inst = Bits(32, INPUT) val appvl0 = Bool(INPUT) - val replay_cntq = Bool(INPUT) val wen = Bool(OUTPUT) val fn = Bits(1, OUTPUT) val sel_vcmd = Bits(3, OUTPUT) val sel_vimm = Bits(1, OUTPUT) + val sel_vimm2 = Bits(1, OUTPUT) } class ioCtrlVecInterface extends Bundle @@ -25,6 +25,8 @@ class ioCtrlVecInterface extends Bundle val vximm1q_ready = Bool(INPUT) val vximm2q_valid = Bool(OUTPUT) val vximm2q_ready = Bool(INPUT) + val vcntq_valid = Bool(OUTPUT) + val vcntq_ready = Bool(INPUT) val vpfcmdq_valid = Bool(OUTPUT) val vpfcmdq_ready = Bool(INPUT) @@ -32,11 +34,17 @@ class ioCtrlVecInterface extends Bundle val vpfximm1q_ready = Bool(INPUT) val vpfximm2q_valid = Bool(OUTPUT) val vpfximm2q_ready = Bool(INPUT) + val vpfcntq_valid = Bool(OUTPUT) + val vpfcntq_ready = Bool(INPUT) val vackq_valid = Bool(INPUT) val vackq_ready = Bool(OUTPUT) - val exception_done = Bool(INPUT) + val exception_ack_valid = Bool(INPUT) + val exception_ack_ready = Bool(OUTPUT) + + val kill_ack_valid = Bool(INPUT) + val kill_ack_ready = Bool(OUTPUT) } class ioCtrlVec extends Bundle @@ -46,7 +54,7 @@ class ioCtrlVec extends Bundle val sr_ev = Bool(INPUT) val exception = Bool(INPUT) val replay = Bool(OUTPUT) - val cpfence = Bool(OUTPUT) + val stalld = Bool(OUTPUT) } class rocketCtrlVec extends Component @@ -55,129 +63,156 @@ class rocketCtrlVec extends Component val veccs = ListLookup(io.dpath.inst, - // appvlmask - // | vcmdq - // | | vximm1q - // | | | vximm2q - // | | | | vpfcmdq - // wen | | | | | vpximm1q - // val vcmd vimm | fn | | | | | | vpximm2q - // | | | | | | | | | | | | cpfence - // | | | | | | | | | | | | | - List(N,VCMD_X, VIMM_X, N,VEC_X ,N,N,N,N,N,N,N,N,N),Array( - VVCFGIVL-> List(Y,VCMD_I, VIMM_VLEN,Y,VEC_CFG,N,Y,Y,N,Y,Y,N,N,N), - VSETVL-> List(Y,VCMD_I, VIMM_VLEN,Y,VEC_VL ,N,Y,Y,N,Y,Y,N,N,N), - VF-> List(Y,VCMD_I, VIMM_ALU, N,VEC_X ,Y,Y,Y,N,N,N,N,N,N), - VMVV-> List(Y,VCMD_TX,VIMM_X, N,VEC_X ,Y,Y,N,N,N,N,N,N,N), - VMSV-> List(Y,VCMD_TX,VIMM_ALU, N,VEC_X ,Y,Y,Y,N,N,N,N,N,N), - VFMVV-> List(Y,VCMD_TF,VIMM_X, N,VEC_X ,Y,Y,N,N,N,N,N,N,N), - FENCE_L_V-> List(Y,VCMD_F, VIMM_X, N,VEC_X ,N,Y,N,N,N,N,N,N,N), - FENCE_G_V-> List(Y,VCMD_F, VIMM_X, N,VEC_X ,N,Y,N,N,N,N,N,N,N), - FENCE_L_CV->List(Y,VCMD_F, VIMM_X, N,VEC_X ,N,Y,N,N,N,N,N,Y,N), - FENCE_G_CV->List(Y,VCMD_F, VIMM_X, N,VEC_X ,N,N,N,N,N,N,N,N,Y), - VLD-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,N,Y,Y,N,N,N), - VLW-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,N,Y,Y,N,N,N), - VLWU-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,N,Y,Y,N,N,N), - VLH-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,N,Y,Y,N,N,N), - VLHU-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,N,Y,Y,N,N,N), - VLB-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,N,Y,Y,N,N,N), - VLBU-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,N,Y,Y,N,N,N), - VSD-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,N,Y,Y,N,N,N), - VSW-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,N,Y,Y,N,N,N), - VSH-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,N,Y,Y,N,N,N), - VSB-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,N,Y,Y,N,N,N), - VFLD-> List(Y,VCMD_MF,VIMM_ALU, N,VEC_X ,Y,Y,Y,N,Y,Y,N,N,N), - VFLW-> List(Y,VCMD_MF,VIMM_ALU, N,VEC_X ,Y,Y,Y,N,Y,Y,N,N,N), - VFSD-> List(Y,VCMD_MF,VIMM_ALU, N,VEC_X ,Y,Y,Y,N,Y,Y,N,N,N), - VFSW-> List(Y,VCMD_MF,VIMM_ALU, N,VEC_X ,Y,Y,Y,N,Y,Y,N,N,N), - VLSTD-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y,Y,Y,Y,N,N), - VLSTW-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y,Y,Y,Y,N,N), - VLSTWU-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y,Y,Y,Y,N,N), - VLSTH-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y,Y,Y,Y,N,N), - VLSTHU-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y,Y,Y,Y,N,N), - VLSTB-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y,Y,Y,Y,N,N), - VLSTBU-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y,Y,Y,Y,N,N), - VSSTD-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y,Y,Y,Y,N,N), - VSSTW-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y,Y,Y,Y,N,N), - VSSTH-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y,Y,Y,Y,N,N), - VSSTB-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y,Y,Y,Y,N,N), - VFLSTD-> List(Y,VCMD_MF,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y,Y,Y,Y,N,N), - VFLSTW-> List(Y,VCMD_MF,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y,Y,Y,Y,N,N), - VFSSTD-> List(Y,VCMD_MF,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y,Y,Y,Y,N,N), - VFSSTW-> List(Y,VCMD_MF,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y,Y,Y,Y,N,N) + // appvlmask + // | vcmdq + // | | vximm1q + // | | | vximm2q + // | | | | vcntq + // | | | | | vpfcmdq + // | | | | | | vpfximm1q + // | | | | | | | vpfximm2q + // wen | | | | | | | | vpfcntq + // val vcmd vimm vimm2 | fn | | | | | | | | | stalld + // | | | | | | | | | | | | | | | | waitxcpt + // | | | | | | | | | | | | | | | | | + List(N,VCMD_X, VIMM_X, VIMM2_X, N,VEC_X, N,N,N,N,N,N,N,N,N,N,N),Array( + VVCFGIVL-> List(Y,VCMD_I, VIMM_VLEN,VIMM2_X, Y,VEC_CFG,N,Y,Y,N,N,Y,Y,N,N,N,N), + VSETVL-> List(Y,VCMD_I, VIMM_VLEN,VIMM2_X, Y,VEC_VL, N,Y,Y,N,N,Y,Y,N,N,N,N), + VF-> List(Y,VCMD_I, VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,N,N,N,N,N,N), + VMVV-> List(Y,VCMD_TX,VIMM_X, VIMM2_X, N,VEC_X, Y,Y,N,N,N,N,N,N,N,N,N), + VMSV-> List(Y,VCMD_TX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,N,N,N,N,N,N), + VFMVV-> List(Y,VCMD_TF,VIMM_X, VIMM2_X, N,VEC_X, Y,Y,N,N,N,N,N,N,N,N,N), + FENCE_L_V-> List(Y,VCMD_F, VIMM_X, VIMM2_X, N,VEC_X, N,Y,N,N,N,N,N,N,N,N,N), + FENCE_G_V-> List(Y,VCMD_F, VIMM_X, VIMM2_X, N,VEC_X, N,Y,N,N,N,N,N,N,N,N,N), + FENCE_L_CV->List(Y,VCMD_F, VIMM_X, VIMM2_X, N,VEC_X, N,Y,N,N,N,N,N,N,N,Y,N), + FENCE_G_CV->List(Y,VCMD_F, VIMM_X, VIMM2_X, N,VEC_X, N,Y,N,N,N,N,N,N,N,Y,N), + VLD-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,Y,Y,N,N,N,N), + VLW-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,Y,Y,N,N,N,N), + VLWU-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,Y,Y,N,N,N,N), + VLH-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,Y,Y,N,N,N,N), + VLHU-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,Y,Y,N,N,N,N), + VLB-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,Y,Y,N,N,N,N), + VLBU-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,Y,Y,N,N,N,N), + VSD-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,Y,Y,N,N,N,N), + VSW-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,Y,Y,N,N,N,N), + VSH-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,Y,Y,N,N,N,N), + VSB-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,Y,Y,N,N,N,N), + VFLD-> List(Y,VCMD_MF,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,Y,Y,N,N,N,N), + VFLW-> List(Y,VCMD_MF,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,Y,Y,N,N,N,N), + VFSD-> List(Y,VCMD_MF,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,Y,Y,N,N,N,N), + VFSW-> List(Y,VCMD_MF,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,Y,Y,N,N,N,N), + VLSTD-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N), + VLSTW-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N), + VLSTWU-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N), + VLSTH-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N), + VLSTHU-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N), + VLSTB-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N), + VLSTBU-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N), + VSSTD-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N), + VSSTW-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N), + VSSTH-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N), + VSSTB-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N), + VFLSTD-> List(Y,VCMD_MF,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N), + VFLSTW-> List(Y,VCMD_MF,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N), + VFSSTD-> List(Y,VCMD_MF,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N), + VFSSTW-> List(Y,VCMD_MF,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N), + VENQCMD-> List(Y,VCMD_A, VIMM_X, VIMM2_X, N,VEC_X, N,Y,N,N,N,Y,N,N,N,N,N), + VENQIMM1-> List(Y,VCMD_X, VIMM_ALU, VIMM2_X, N,VEC_X, N,N,Y,N,N,N,Y,N,N,N,N), + VENQIMM2-> List(Y,VCMD_X, VIMM_X, VIMM2_X, N,VEC_X, N,N,N,Y,N,N,N,Y,N,N,N), + VENQCNT-> List(Y,VCMD_X, VIMM_X, VIMM2_X, N,VEC_X, N,N,N,N,Y,N,N,N,Y,N,N), + VWAITXCPT-> List(Y,VCMD_X, VIMM_X, VIMM2_X, N,VEC_X, N,N,N,N,N,N,N,N,N,N,Y), + VWAITKILL-> List(Y,VCMD_X, VIMM_X, VIMM2_X, N,VEC_X, N,N,N,N,N,N,N,N,N,N,Y) )) - val wb_vec_val :: wb_sel_vcmd :: wb_sel_vimm :: wb_vec_wen :: wb_vec_fn :: wb_vec_appvlmask :: veccs0 = veccs - val wb_vec_cmdq_enq :: wb_vec_ximm1q_enq :: wb_vec_ximm2q_enq :: veccs1 = veccs0 - val wb_vec_pfcmdq_enq :: wb_vec_pfximm1q_enq :: wb_vec_pfximm2q_enq :: wb_vec_cpfence :: wb_vec_xcptfence :: Nil = veccs1 + val wb_vec_val :: wb_sel_vcmd :: wb_sel_vimm :: wb_sel_vimm2 :: wb_vec_wen :: wb_vec_fn :: wb_vec_appvlmask :: veccs0 = veccs + val wb_vec_cmdq_enq :: wb_vec_ximm1q_enq :: wb_vec_ximm2q_enq :: wb_vec_cntq_enq :: veccs1 = veccs0 + val wb_vec_pfcmdq_enq :: wb_vec_pfximm1q_enq :: wb_vec_pfximm2q_enq :: wb_vec_pfcntq_enq :: veccs2 = veccs1 + val wb_vec_stalld :: wb_vec_waitxcpt :: Nil = veccs2 - val valid_common = io.dpath.valid && io.sr_ev && wb_vec_val.toBool && !(wb_vec_appvlmask.toBool && io.dpath.appvl0) + val valid_common = io.dpath.valid && io.sr_ev && wb_vec_val && !(wb_vec_appvlmask && io.dpath.appvl0) val mask_wb_vec_cmdq_ready = !wb_vec_cmdq_enq || io.iface.vcmdq_ready val mask_wb_vec_ximm1q_ready = !wb_vec_ximm1q_enq || io.iface.vximm1q_ready val mask_wb_vec_ximm2q_ready = !wb_vec_ximm2q_enq || io.iface.vximm2q_ready + val mask_wb_vec_cntq_ready = !wb_vec_cntq_enq || io.iface.vcntq_ready val mask_wb_vec_pfcmdq_ready = !wb_vec_pfcmdq_enq || io.iface.vpfcmdq_ready val mask_wb_vec_pfximm1q_ready = !wb_vec_pfximm1q_enq || io.iface.vpfximm1q_ready val mask_wb_vec_pfximm2q_ready = !wb_vec_pfximm2q_enq || io.iface.vpfximm2q_ready + val mask_wb_vec_pfcntq_ready = !wb_vec_pfcntq_enq || io.iface.vpfcntq_ready io.dpath.wen := wb_vec_wen.toBool io.dpath.fn := wb_vec_fn io.dpath.sel_vcmd := wb_sel_vcmd io.dpath.sel_vimm := wb_sel_vimm + io.dpath.sel_vimm2 := wb_sel_vimm2 io.iface.vcmdq_valid := valid_common && - wb_vec_cmdq_enq && mask_wb_vec_ximm1q_ready && mask_wb_vec_ximm2q_ready && - mask_wb_vec_pfcmdq_ready && mask_wb_vec_pfximm1q_ready && mask_wb_vec_pfximm2q_ready + wb_vec_cmdq_enq && mask_wb_vec_ximm1q_ready && mask_wb_vec_ximm2q_ready && mask_wb_vec_cntq_ready && + mask_wb_vec_pfcmdq_ready && mask_wb_vec_pfximm1q_ready && mask_wb_vec_pfximm2q_ready && mask_wb_vec_pfcntq_ready io.iface.vximm1q_valid := valid_common && - mask_wb_vec_cmdq_ready && wb_vec_ximm1q_enq && mask_wb_vec_ximm2q_ready && - mask_wb_vec_pfcmdq_ready && mask_wb_vec_pfximm1q_ready && mask_wb_vec_pfximm2q_ready + mask_wb_vec_cmdq_ready && wb_vec_ximm1q_enq && mask_wb_vec_ximm2q_ready && mask_wb_vec_cntq_ready && + mask_wb_vec_pfcmdq_ready && mask_wb_vec_pfximm1q_ready && mask_wb_vec_pfximm2q_ready && mask_wb_vec_pfcntq_ready io.iface.vximm2q_valid := valid_common && - mask_wb_vec_cmdq_ready && mask_wb_vec_ximm1q_ready && wb_vec_ximm2q_enq && - mask_wb_vec_pfcmdq_ready && mask_wb_vec_pfximm1q_ready && mask_wb_vec_pfximm2q_ready + mask_wb_vec_cmdq_ready && mask_wb_vec_ximm1q_ready && wb_vec_ximm2q_enq && mask_wb_vec_cntq_ready && + mask_wb_vec_pfcmdq_ready && mask_wb_vec_pfximm1q_ready && mask_wb_vec_pfximm2q_ready && mask_wb_vec_pfcntq_ready + + io.iface.vcntq_valid := + valid_common && + mask_wb_vec_cmdq_ready && mask_wb_vec_ximm1q_ready && mask_wb_vec_ximm2q_ready && wb_vec_cntq_enq && + mask_wb_vec_pfcmdq_ready && mask_wb_vec_pfximm1q_ready && mask_wb_vec_pfximm2q_ready && mask_wb_vec_pfcntq_ready io.iface.vpfcmdq_valid := valid_common && - mask_wb_vec_cmdq_ready && mask_wb_vec_ximm1q_ready && mask_wb_vec_ximm2q_ready && - wb_vec_pfcmdq_enq && mask_wb_vec_pfximm1q_ready && mask_wb_vec_pfximm2q_ready + mask_wb_vec_cmdq_ready && mask_wb_vec_ximm1q_ready && mask_wb_vec_ximm2q_ready && mask_wb_vec_cntq_ready && + wb_vec_pfcmdq_enq && mask_wb_vec_pfximm1q_ready && mask_wb_vec_pfximm2q_ready && mask_wb_vec_pfcntq_ready io.iface.vpfximm1q_valid := valid_common && - mask_wb_vec_cmdq_ready && mask_wb_vec_ximm1q_ready && mask_wb_vec_ximm2q_ready && - mask_wb_vec_pfcmdq_ready && wb_vec_pfximm1q_enq && mask_wb_vec_pfximm2q_ready + mask_wb_vec_cmdq_ready && mask_wb_vec_ximm1q_ready && mask_wb_vec_ximm2q_ready && mask_wb_vec_cntq_ready && + mask_wb_vec_pfcmdq_ready && wb_vec_pfximm1q_enq && mask_wb_vec_pfximm2q_ready && mask_wb_vec_pfcntq_ready io.iface.vpfximm2q_valid := valid_common && - mask_wb_vec_cmdq_ready && mask_wb_vec_ximm1q_ready && mask_wb_vec_ximm2q_ready && - mask_wb_vec_pfcmdq_ready && mask_wb_vec_pfximm1q_ready && wb_vec_pfximm2q_enq + mask_wb_vec_cmdq_ready && mask_wb_vec_ximm1q_ready && mask_wb_vec_ximm2q_ready && mask_wb_vec_cntq_ready && + mask_wb_vec_pfcmdq_ready && mask_wb_vec_pfximm1q_ready && wb_vec_pfximm2q_enq && mask_wb_vec_pfcntq_ready + + io.iface.vpfcntq_valid := + valid_common && + mask_wb_vec_cmdq_ready && mask_wb_vec_ximm1q_ready && mask_wb_vec_ximm2q_ready && mask_wb_vec_cntq_ready && + mask_wb_vec_pfcmdq_ready && mask_wb_vec_pfximm1q_ready && mask_wb_vec_pfximm2q_ready && wb_vec_pfcntq_enq io.iface.vackq_ready := Bool(true) + io.iface.exception_ack_ready := Bool(true) + io.iface.kill_ack_ready := Bool(true) io.replay := valid_common && ( wb_vec_cmdq_enq && !io.iface.vcmdq_ready || wb_vec_ximm1q_enq && !io.iface.vximm1q_ready || wb_vec_ximm2q_enq && !io.iface.vximm2q_ready || + wb_vec_cntq_enq && !io.iface.vcntq_ready || wb_vec_pfcmdq_enq && !io.iface.vpfcmdq_ready || wb_vec_pfximm1q_enq && !io.iface.vpfximm1q_ready || wb_vec_pfximm2q_enq && !io.iface.vpfximm2q_ready || - io.dpath.replay_cntq + wb_vec_pfcntq_enq && !io.iface.vpfcntq_ready ) - val reg_cpfence = Reg(resetVal = Bool(false)) - val do_cpfence = valid_common && wb_vec_cpfence && !io.replay + val reg_stalld = Reg(resetVal = Bool(false)) + val do_stalld = valid_common && wb_vec_stalld && !io.replay - when (do_cpfence) { reg_cpfence := Bool(true) } - when (io.iface.vackq_valid || io.exception) { reg_cpfence := Bool(false) } + when (do_stalld) { reg_stalld := Bool(true) } + when (io.iface.vackq_valid || io.exception) { reg_stalld := Bool(false) } - val reg_xcptfence = Reg(resetVal = Bool(false)) - val do_xcptfence = valid_common && wb_vec_xcptfence && !io.replay + val reg_waitxcpt = Reg(resetVal = Bool(false)) + val do_waitxcpt = valid_common && wb_vec_waitxcpt && !io.replay - when (do_xcptfence) { reg_xcptfence := Bool(true) } - when (io.iface.exception_done) { reg_xcptfence := Bool(false) } + when (do_waitxcpt) { reg_waitxcpt := Bool(true) } + when (io.iface.exception_ack_valid) { reg_waitxcpt := Bool(false) } + when (io.iface.kill_ack_valid) { reg_waitxcpt := Bool(false) } - io.cpfence := reg_cpfence || reg_xcptfence + io.stalld := reg_stalld || reg_waitxcpt } diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index a09a3f26..817e001b 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -385,9 +385,6 @@ class rocketDpath extends Component vec.io.rs2 := wb_reg_rs2 vec.io.vec_eaddr := pcr.io.vec_eaddr vec.io.vec_exception := pcr.io.vec_exception - vec.io.pcr_wport.addr := wb_reg_raddr2 - vec.io.pcr_wport.en := io.ctrl.wen_pcr - vec.io.pcr_wport.data := wb_reg_wdata wb_wdata := Mux(vec.io.wen, Cat(Bits(0,52), vec.io.appvl), diff --git a/rocket/src/main/scala/dpath_vec.scala b/rocket/src/main/scala/dpath_vec.scala index f801ac5c..012507af 100644 --- a/rocket/src/main/scala/dpath_vec.scala +++ b/rocket/src/main/scala/dpath_vec.scala @@ -11,7 +11,7 @@ class ioDpathVecInterface extends Bundle val vcmdq_bits = Bits(SZ_VCMD, OUTPUT) val vximm1q_bits = Bits(SZ_VIMM, OUTPUT) val vximm2q_bits = Bits(SZ_VSTRIDE, OUTPUT) - val vcntq = (new ioDecoupled()){ Bits(width = 11) } + val vcntq_bits = Bits(SZ_VLEN, OUTPUT) val eaddr = Bits(64, OUTPUT) val exception = Bool(OUTPUT) } @@ -30,7 +30,6 @@ class ioDpathVec extends Bundle val rs2 = Bits(64, INPUT) val vec_eaddr = Bits(64, INPUT) val vec_exception = Bool(INPUT) - val pcr_wport = new ioWritePort() val wen = Bool(OUTPUT) val appvl = UFix(12, OUTPUT) } @@ -123,17 +122,18 @@ class rocketDpathVec extends Component Mux(io.ctrl.sel_vcmd === VCMD_TF, Cat(Bits(1,2), io.inst(13,8), Bits(1,1), io.waddr, Bits(1,1), io.raddr1), Mux(io.ctrl.sel_vcmd === VCMD_MX, Cat(Bits(1,1), io.inst(13,12), io.inst(2), io.inst(10,7), Bits(0,1), io.waddr, Bits(0,1), io.waddr), Mux(io.ctrl.sel_vcmd === VCMD_MF, Cat(Bits(1,1), io.inst(13,12), io.inst(2), io.inst(10,7), Bits(1,1), io.waddr, Bits(1,1), io.waddr), - Bits(0,20))))))) + Mux(io.ctrl.sel_vcmd === VCMD_A, io.wdata(SZ_VCMD-1, 0), + Bits(0,20)))))))) io.iface.vximm1q_bits := Mux(io.ctrl.sel_vimm === VIMM_VLEN, Cat(Bits(0,29), io.vecbankcnt, io.vecbank, io.inst(21,10), vlenm1(10,0)), - io.wdata) // VIMM_ALU + io.wdata) // VIMM_ALU - io.iface.vximm2q_bits := io.rs2 + io.iface.vximm2q_bits := + Mux(io.ctrl.sel_vimm2 === VIMM2_RS2, io.rs2, + io.wdata) // VIMM2_ALU - io.iface.vcntq.bits := io.pcr_wport.data - io.iface.vcntq.valid := io.pcr_wport.en && io.pcr_wport.addr === PCR_VEC_CNT - io.ctrl.replay_cntq := io.iface.vcntq.valid && !io.iface.vcntq.ready + io.iface.vcntq_bits := io.wdata(SZ_VLEN-1, 0) io.iface.eaddr := io.vec_eaddr io.iface.exception := io.vec_exception diff --git a/rocket/src/main/scala/instructions.scala b/rocket/src/main/scala/instructions.scala index 1511e1f9..4491e80d 100644 --- a/rocket/src/main/scala/instructions.scala +++ b/rocket/src/main/scala/instructions.scala @@ -246,6 +246,13 @@ object Instructions val VTCFGIVL = Bits("b?????_?????_????????????_011_1110011",32); val VSETVL = Bits("b?????_?????_000000000000_101_1110011",32); val VF = Bits("b00000_?????_????????????_111_1110011",32); + // vector supervisor instructions + val VENQCMD = Bits("b00000_?????_00000_1000000000_1111011",32) + val VENQIMM1 = Bits("b00000_?????_00000_1000000001_1111011",32) + val VENQIMM2 = Bits("b00000_?????_00000_1000000010_1111011",32) + val VENQCNT = Bits("b00000_?????_00000_1000000011_1111011",32) + val VWAITXCPT = Bits("b00000_00000_00000_1100000000_1111011",32) + val VWAITKILL = Bits("b00000_00000_00000_1100000001_1111011",32) val NOP = ADDI & Bits("b00000000000000000000001111111111", 32); }