diff --git a/rocket/src/main/scala/consts.scala b/rocket/src/main/scala/consts.scala index ac5de59d..731f24ba 100644 --- a/rocket/src/main/scala/consts.scala +++ b/rocket/src/main/scala/consts.scala @@ -16,25 +16,20 @@ object Constants val HTIF_WIDTH = 16 val MEM_BACKUP_WIDTH = HTIF_WIDTH - val BR_X = Bits("b????", 4) - val BR_N = UFix(0, 4); - val BR_EQ = UFix(1, 4); - val BR_NE = UFix(2, 4); - val BR_LT = UFix(3, 4); - val BR_LTU = UFix(4, 4); - val BR_GE = UFix(5, 4); - val BR_GEU = UFix(6, 4); - val BR_J = UFix(7, 4); - val BR_JR = UFix(8, 4); + val BR_X = Bits("b???", 3) + val BR_EQ = UFix(0, 3) + val BR_NE = UFix(1, 3) + val BR_J = UFix(2, 3) + val BR_N = UFix(3, 3) + val BR_LT = UFix(4, 3) + val BR_GE = UFix(5, 3) + val BR_LTU = UFix(6, 3) + val BR_GEU = UFix(7, 3) - val PC_4 = UFix(0, 3); - val PC_BTB = UFix(1, 3); - val PC_EX4 = UFix(2, 3); - val PC_BR = UFix(3, 3); - val PC_PCR = UFix(4, 3); - val PC_WB = UFix(5, 3); - val PC_EVEC = UFix(6, 3); - val PC_JR = UFix(7, 3); + val PC_EX4 = UFix(0, 2) + val PC_EX = UFix(1, 2) + val PC_WB = UFix(2, 2) + val PC_PCR = UFix(3, 2) val A2_X = Bits("b???", 3) val A2_BTYPE = UFix(0, 3); @@ -70,19 +65,6 @@ object Constants val WB_TSC = UFix(4, 3); val WB_IRT = UFix(5, 3); - val FN_X = Bits("b????", 4) - val FN_ADD = UFix(0, 4); - val FN_SUB = UFix(1, 4); - val FN_SLT = UFix(2, 4); - val FN_SLTU = UFix(3, 4); - val FN_AND = UFix(4, 4); - val FN_OR = UFix(5, 4); - val FN_XOR = UFix(6, 4); - val FN_SL = UFix(7, 4); - val FN_SR = UFix(8, 4); - val FN_SRA = UFix(9, 4); - val FN_OP2 = UFix(10, 4); - val DW_X = X val DW_32 = N val DW_64 = Y @@ -175,6 +157,7 @@ object Constants val PERM_BITS = 6; // rocketNBDCache parameters + val INST_BITS = 32 val DCACHE_PORTS = 3 val CPU_DATA_BITS = 64; val CPU_TAG_BITS = 9; @@ -212,8 +195,9 @@ object Constants val MEM_DATA_BITS = 128 val REFILL_CYCLES = (1 << OFFSET_BITS)*8/MEM_DATA_BITS + val BTB_ENTRIES = 8 + val ITLB_ENTRIES = 8 val DTLB_ENTRIES = 16 - val ITLB_ENTRIES = 8; val VITLB_ENTRIES = 4 val START_ADDR = 0x2000; diff --git a/rocket/src/main/scala/cpu.scala b/rocket/src/main/scala/cpu.scala index c0a2f1c7..ec0178c1 100644 --- a/rocket/src/main/scala/cpu.scala +++ b/rocket/src/main/scala/cpu.scala @@ -8,8 +8,8 @@ import hwacha._ class ioRocket extends Bundle() { val host = new ioHTIF - val imem = (new ioImem).flip - val vimem = (new ioImem).flip + val imem = new IOCPUFrontend + val vimem = new IOCPUFrontend val dmem = new ioHellaCache } @@ -20,8 +20,7 @@ class rocketProc extends Component val ctrl = new rocketCtrl(); val dpath = new rocketDpath(); - val dtlb = new rocketDTLB(DTLB_ENTRIES); - val itlb = new rocketITLB(ITLB_ENTRIES); + val dtlb = new rocketTLB(DTLB_ENTRIES); val ptw = new rocketPTW(if (HAVE_VEC) 3 else 2) val arb = new rocketHellaCacheArbiter(DCACHE_PORTS) @@ -59,7 +58,7 @@ class rocketProc extends Component dtlbarb.io.in(DTLB_CPU).valid := ctrl.io.dtlb_val dtlbarb.io.in(DTLB_CPU).bits.kill := ctrl.io.dtlb_kill dtlbarb.io.in(DTLB_CPU).bits.cmd := ctrl.io.dmem.req.bits.cmd - dtlbarb.io.in(DTLB_CPU).bits.asid := Bits(0,ASID_BITS); // FIXME: connect to PCR + dtlbarb.io.in(DTLB_CPU).bits.asid := UFix(0) dtlbarb.io.in(DTLB_CPU).bits.vpn := dpath.io.dtlb.vpn ctrl.io.dtlb_rdy := dtlbarb.io.in(DTLB_CPU).ready @@ -75,7 +74,7 @@ class rocketProc extends Component dtlb.io.cpu_req.valid := ctrl.io.dtlb_val dtlb.io.cpu_req.bits.kill := ctrl.io.dtlb_kill dtlb.io.cpu_req.bits.cmd := ctrl.io.dmem.req.bits.cmd - dtlb.io.cpu_req.bits.asid := Bits(0,ASID_BITS); // FIXME: connect to PCR + dtlb.io.cpu_req.bits.asid := UFix(0) dtlb.io.cpu_req.bits.vpn := dpath.io.dtlb.vpn ctrl.io.xcpt_dtlb_ld := dtlb.io.cpu_resp.xcpt_ld ctrl.io.xcpt_dtlb_st := dtlb.io.cpu_resp.xcpt_st @@ -91,7 +90,7 @@ class rocketProc extends Component // connect page table walker to TLBs, page table base register (from PCR) // and D$ arbiter (selects between requests from pipeline and PTW, PTW has priority) - ptw.io.requestor(0) <> itlb.io.ptw + ptw.io.requestor(0) <> io.imem.ptw ptw.io.requestor(1) <> dtlb.io.ptw ptw.io.ptbr := dpath.io.ptbr; arb.io.requestor(DMEM_PTW) <> ptw.io.mem @@ -102,20 +101,9 @@ class rocketProc extends Component // FIXME: try to make this more compact - // connect ITLB to I$, ctrl, dpath - itlb.io.cpu.invalidate := dpath.io.ptbr_wen; - itlb.io.cpu.status := dpath.io.ctrl.status; - itlb.io.cpu.req_val := ctrl.io.imem.req_val; - itlb.io.cpu.req_asid := Bits(0,ASID_BITS); // FIXME: connect to PCR - itlb.io.cpu.req_vpn := dpath.io.imem.req_addr(VADDR_BITS,PGIDX_BITS); - io.imem.req_idx := dpath.io.imem.req_addr(PGIDX_BITS-1,0); - io.imem.req_ppn := itlb.io.cpu.resp_ppn; - io.imem.req_val := ctrl.io.imem.req_val; - io.imem.invalidate := ctrl.io.dpath.flush_inst; - ctrl.io.imem.resp_val := io.imem.resp_val; - dpath.io.imem.resp_data := io.imem.resp_data; - ctrl.io.xcpt_itlb := itlb.io.cpu.exception; - io.imem.itlb_miss := itlb.io.cpu.resp_miss; + // connect I$ + ctrl.io.imem <> io.imem + dpath.io.imem <> io.imem // connect arbiter to ctrl+dpath+DTLB //TODO: views on nested bundles? @@ -144,22 +132,19 @@ class rocketProc extends Component dpath.io.vec_ctrl <> ctrl.io.vec_dpath // hooking up vector I$ - val vitlb = new rocketITLB(VITLB_ENTRIES) - ptw.io.requestor(2) <> vitlb.io.ptw - vitlb.io.cpu.invalidate := dpath.io.ptbr_wen - vitlb.io.cpu.status := dpath.io.ctrl.status - vitlb.io.cpu.req_val := vu.io.imem_req.valid - vitlb.io.cpu.req_asid := Bits(0,ASID_BITS) // FIXME: connect to PCR - vitlb.io.cpu.req_vpn := vu.io.imem_req.bits(VADDR_BITS,PGIDX_BITS).toUFix - io.vimem.req_idx := vu.io.imem_req.bits(PGIDX_BITS-1,0) - io.vimem.req_ppn := vitlb.io.cpu.resp_ppn - io.vimem.req_val := vu.io.imem_req.valid - io.vimem.invalidate := ctrl.io.dpath.flush_inst - vu.io.imem_req.ready := Bool(true) - vu.io.imem_resp.valid := io.vimem.resp_val - vu.io.imem_resp.bits := io.vimem.resp_data - vu.io.vitlb_exception := vitlb.io.cpu.exception - io.vimem.itlb_miss := vitlb.io.cpu.resp_miss + ptw.io.requestor(2) <> io.vimem.ptw + io.vimem.req.bits.status := dpath.io.ctrl.status + io.vimem.req.bits.pc := vu.io.imem_req.bits.toUFix + io.vimem.req.valid := vu.io.imem_req.valid + io.vimem.req.bits.invalidate := ctrl.io.dpath.flush_inst + io.vimem.req.bits.invalidateTLB := dpath.io.ptbr_wen + vu.io.imem_req.ready := Bool(true) + vu.io.imem_resp.valid := io.vimem.resp.valid + vu.io.imem_resp.bits := io.vimem.resp.bits.data + vu.io.vitlb_exception := io.vimem.resp.bits.xcpt_if + io.vimem.resp.ready := Bool(true) + io.vimem.req.bits.mispredict := Bool(false) + io.vimem.req.bits.taken := Bool(false) // hooking up vector command queues vu.io.vec_cmdq.valid := ctrl.io.vec_iface.vcmdq_valid diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 24304498..f88aca6b 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -6,16 +6,13 @@ import Node._; import Constants._ import Instructions._ import hwacha._ +import ALU._ class ioCtrlDpath extends Bundle() { // outputs to datapath val sel_pc = UFix(OUTPUT, 3); - val wen_btb = Bool(OUTPUT); - val clr_btb = Bool(OUTPUT); - val stallf = Bool(OUTPUT); val stalld = Bool(OUTPUT); - val killf = Bool(OUTPUT); val killd = Bool(OUTPUT); val killx = Bool(OUTPUT); val killm = Bool(OUTPUT); @@ -26,8 +23,10 @@ class ioCtrlDpath extends Bundle() val fn_alu = UFix(OUTPUT, 4); val mul_val = Bool(OUTPUT); val mul_fn = UFix(OUTPUT, 2); + val mul_kill = Bool(OUTPUT) val div_val = Bool(OUTPUT); val div_fn = UFix(OUTPUT, 2); + val div_kill = Bool(OUTPUT) val sel_wa = Bool(OUTPUT); val sel_wb = UFix(OUTPUT, 3); val pcr = UFix(OUTPUT, 3) @@ -37,6 +36,7 @@ class ioCtrlDpath extends Bundle() val ex_fp_val= Bool(OUTPUT); val mem_fp_val= Bool(OUTPUT); val ex_wen = Bool(OUTPUT); + val ex_jalr = Bool(OUTPUT) val mem_wen = Bool(OUTPUT); val wb_wen = Bool(OUTPUT); val wb_valid = Bool(OUTPUT) @@ -48,8 +48,6 @@ class ioCtrlDpath extends Bundle() val badvaddr_wen = Bool(OUTPUT); // high for a load/store access fault val vec_irq_aux_wen = Bool(OUTPUT) // inputs from datapath - val xcpt_ma_inst = Bool(INPUT); // high on a misaligned/illegal virtual PC - val btb_hit = Bool(INPUT); val inst = Bits(INPUT, 32); val br_eq = Bool(INPUT); val br_lt = Bool(INPUT); @@ -76,7 +74,7 @@ class ioCtrlDpath extends Bundle() class ioCtrlAll extends Bundle() { val dpath = new ioCtrlDpath(); - val imem = new ioImem().flip + val imem = new IOCPUFrontend val dmem = new ioHellaCache val dtlb_val = Bool(OUTPUT); val dtlb_kill = Bool(OUTPUT); @@ -84,7 +82,6 @@ class ioCtrlAll extends Bundle() val dtlb_miss = Bool(INPUT); val xcpt_dtlb_ld = Bool(INPUT); val xcpt_dtlb_st = Bool(INPUT); - val xcpt_itlb = Bool(INPUT); val fpu = new ioCtrlFPU(); val vec_dpath = new ioCtrlDpathVec() val vec_iface = new ioCtrlVecInterface() @@ -95,239 +92,239 @@ object rocketCtrlDecode val xpr64 = Y; val decode_default = - // eret - // fp_val renx2 | syscall - // | vec_val | renx1 mem_val mul_val div_val wen pcr | | privileged - // val | | brtype | | s_alu2 dw alu | mem_cmd mem_type| mul_fn | div_fn | s_wa s_wb | sync | | | replay_next - // | | | | | | | | | | | | | | | | | | | | | | | | | - List(N, X,X,BR_X, X,X,A2_X, DW_X, FN_X, N,M_X, MT_X, X,MUL_X, X,DIV_X, X,WA_X, WB_X, PCR_X,SYNC_X,X,X,X,X) - - val xdecode = Array( - // eret - // fp_val renx2 | syscall - // | vec_val | renx1 mem_val mul_val div_val wen pcr | | privileged - // val | | brtype | | s_alu2 dw alu | mem_cmd mem_type| mul_fn | div_fn | s_wa s_wb | sync | | | replay_next - // | | | | | | | | | | | | | | | | | | | | | | | | | - BNE-> List(Y, N,N,BR_NE, Y,Y,A2_BTYPE,DW_X, FN_ADD, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N), - BEQ-> List(Y, N,N,BR_EQ, Y,Y,A2_BTYPE,DW_X, FN_ADD, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N), - BLT-> List(Y, N,N,BR_LT, Y,Y,A2_BTYPE,DW_X, FN_ADD, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N), - BLTU-> List(Y, N,N,BR_LTU,Y,Y,A2_BTYPE,DW_X, FN_ADD, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N), - BGE-> List(Y, N,N,BR_GE, Y,Y,A2_BTYPE,DW_X, FN_ADD, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N), - BGEU-> List(Y, N,N,BR_GEU,Y,Y,A2_BTYPE,DW_X, FN_ADD, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N), - - J-> List(Y, N,N,BR_J, N,N,A2_JTYPE,DW_X, FN_ADD, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N), - JAL-> List(Y, N,N,BR_J, N,N,A2_JTYPE,DW_X, FN_ADD, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RA,WB_PC, PCR_N,SYNC_N,N,N,N,N), - JALR_C-> List(Y, N,N,BR_JR, N,Y,A2_ITYPE,DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_PC, PCR_N,SYNC_N,N,N,N,N), - JALR_J-> List(Y, N,N,BR_JR, N,Y,A2_ITYPE,DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_PC, PCR_N,SYNC_N,N,N,N,N), - JALR_R-> List(Y, N,N,BR_JR, N,Y,A2_ITYPE,DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_PC, PCR_N,SYNC_N,N,N,N,N), - RDNPC-> List(Y, N,N,BR_N, N,Y,A2_ITYPE,DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_PC, PCR_N,SYNC_N,N,N,N,N), - - LB-> List(Y, N,N,BR_N, N,Y,A2_ITYPE,DW_XPR,FN_ADD, Y,M_XRD, MT_B, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - LH-> List(Y, N,N,BR_N, N,Y,A2_ITYPE,DW_XPR,FN_ADD, Y,M_XRD, MT_H, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - LW-> List(Y, N,N,BR_N, N,Y,A2_ITYPE,DW_XPR,FN_ADD, Y,M_XRD, MT_W, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - LD-> List(xpr64,N,N,BR_N, N,Y,A2_ITYPE,DW_XPR,FN_ADD, Y,M_XRD, MT_D, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - LBU-> List(Y, N,N,BR_N, N,Y,A2_ITYPE,DW_XPR,FN_ADD, Y,M_XRD, MT_BU,N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - LHU-> List(Y, N,N,BR_N, N,Y,A2_ITYPE,DW_XPR,FN_ADD, Y,M_XRD, MT_HU,N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - LWU-> List(xpr64,N,N,BR_N, N,Y,A2_ITYPE,DW_XPR,FN_ADD, Y,M_XRD, MT_WU,N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - SB-> List(Y, N,N,BR_N, Y,Y,A2_BTYPE,DW_XPR,FN_ADD, Y,M_XWR, MT_B, N,MUL_X, N,DIV_X, N,WA_X, WB_ALU,PCR_N,SYNC_N,N,N,N,N), - SH-> List(Y, N,N,BR_N, Y,Y,A2_BTYPE,DW_XPR,FN_ADD, Y,M_XWR, MT_H, N,MUL_X, N,DIV_X, N,WA_X, WB_ALU,PCR_N,SYNC_N,N,N,N,N), - SW-> List(Y, N,N,BR_N, Y,Y,A2_BTYPE,DW_XPR,FN_ADD, Y,M_XWR, MT_W, N,MUL_X, N,DIV_X, N,WA_X, WB_ALU,PCR_N,SYNC_N,N,N,N,N), - SD-> List(xpr64,N,N,BR_N, Y,Y,A2_BTYPE,DW_XPR,FN_ADD, Y,M_XWR, MT_D, N,MUL_X, N,DIV_X, N,WA_X, WB_ALU,PCR_N,SYNC_N,N,N,N,N), - - AMOADD_W-> List(Y, N,N,BR_N, Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_ADD, MT_W, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - AMOSWAP_W-> List(Y, N,N,BR_N, Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_SWAP,MT_W, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - AMOAND_W-> List(Y, N,N,BR_N, Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_AND, MT_W, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - AMOOR_W-> List(Y, N,N,BR_N, Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_OR, MT_W, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - AMOMIN_W-> List(Y, N,N,BR_N, Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_MIN, MT_W, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - AMOMINU_W-> List(Y, N,N,BR_N, Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_MINU,MT_W, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - AMOMAX_W-> List(Y, N,N,BR_N, Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_MAX, MT_W, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - AMOMAXU_W-> List(Y, N,N,BR_N, Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_MAXU,MT_W, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - AMOADD_D-> List(xpr64,N,N,BR_N, Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_ADD, MT_D, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - AMOSWAP_D-> List(xpr64,N,N,BR_N, Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_SWAP,MT_D, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - AMOAND_D-> List(xpr64,N,N,BR_N, Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_AND, MT_D, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - AMOOR_D-> List(xpr64,N,N,BR_N, Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_OR, MT_D, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - AMOMIN_D-> List(xpr64,N,N,BR_N, Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_MIN, MT_D, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - AMOMINU_D-> List(xpr64,N,N,BR_N, Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_MINU,MT_D, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - AMOMAX_D-> List(xpr64,N,N,BR_N, Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_MAX, MT_D, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - AMOMAXU_D-> List(xpr64,N,N,BR_N, Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_MAXU,MT_D, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - - LUI-> List(Y, N,N,BR_N, N,N,A2_LTYPE,DW_XPR,FN_OP2, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - ADDI-> List(Y, N,N,BR_N, N,Y,A2_ITYPE,DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - SLTI -> List(Y, N,N,BR_N, N,Y,A2_ITYPE,DW_XPR,FN_SLT, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - SLTIU-> List(Y, N,N,BR_N, N,Y,A2_ITYPE,DW_XPR,FN_SLTU,N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - ANDI-> List(Y, N,N,BR_N, N,Y,A2_ITYPE,DW_XPR,FN_AND, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - ORI-> List(Y, N,N,BR_N, N,Y,A2_ITYPE,DW_XPR,FN_OR, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - XORI-> List(Y, N,N,BR_N, N,Y,A2_ITYPE,DW_XPR,FN_XOR, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - SLLI-> List(Y, N,N,BR_N, N,Y,A2_ITYPE,DW_XPR,FN_SL, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - SRLI-> List(Y, N,N,BR_N, N,Y,A2_ITYPE,DW_XPR,FN_SR, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - SRAI-> List(Y, N,N,BR_N, N,Y,A2_ITYPE,DW_XPR,FN_SRA, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - ADD-> List(Y, N,N,BR_N, Y,Y,A2_RTYPE,DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - SUB-> List(Y, N,N,BR_N, Y,Y,A2_RTYPE,DW_XPR,FN_SUB, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - SLT-> List(Y, N,N,BR_N, Y,Y,A2_RTYPE,DW_XPR,FN_SLT, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - SLTU-> List(Y, N,N,BR_N, Y,Y,A2_RTYPE,DW_XPR,FN_SLTU,N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - riscvAND-> List(Y, N,N,BR_N, Y,Y,A2_RTYPE,DW_XPR,FN_AND, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - riscvOR-> List(Y, N,N,BR_N, Y,Y,A2_RTYPE,DW_XPR,FN_OR, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - riscvXOR-> List(Y, N,N,BR_N, Y,Y,A2_RTYPE,DW_XPR,FN_XOR, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - SLL-> List(Y, N,N,BR_N, Y,Y,A2_RTYPE,DW_XPR,FN_SL, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - SRL-> List(Y, N,N,BR_N, Y,Y,A2_RTYPE,DW_XPR,FN_SR, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - SRA-> List(Y, N,N,BR_N, Y,Y,A2_RTYPE,DW_XPR,FN_SRA, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - - ADDIW-> List(xpr64,N,N,BR_N, N,Y,A2_ITYPE,DW_32,FN_ADD, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - SLLIW-> List(xpr64,N,N,BR_N, N,Y,A2_ITYPE,DW_32,FN_SL, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - SRLIW-> List(xpr64,N,N,BR_N, N,Y,A2_ITYPE,DW_32,FN_SR, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - SRAIW-> List(xpr64,N,N,BR_N, N,Y,A2_ITYPE,DW_32,FN_SRA, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - ADDW-> List(xpr64,N,N,BR_N, Y,Y,A2_RTYPE,DW_32,FN_ADD, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - SUBW-> List(xpr64,N,N,BR_N, Y,Y,A2_RTYPE,DW_32,FN_SUB, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - SLLW-> List(xpr64,N,N,BR_N, Y,Y,A2_RTYPE,DW_32,FN_SL, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - SRLW-> List(xpr64,N,N,BR_N, Y,Y,A2_RTYPE,DW_32,FN_SR, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - SRAW-> List(xpr64,N,N,BR_N, Y,Y,A2_RTYPE,DW_32,FN_SRA, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - - MUL-> List(Y, N,N,BR_N, Y,Y,A2_X, DW_XPR,FN_X, N,M_X, MT_X, Y,MUL_LO, N,DIV_X, Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - MULH-> List(Y, N,N,BR_N, Y,Y,A2_X, DW_XPR,FN_X, N,M_X, MT_X, Y,MUL_H, N,DIV_X, Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - MULHU-> List(Y, N,N,BR_N, Y,Y,A2_X, DW_XPR,FN_X, N,M_X, MT_X, Y,MUL_HU, N,DIV_X, Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - MULHSU-> List(Y, N,N,BR_N, Y,Y,A2_X, DW_XPR,FN_X, N,M_X, MT_X, Y,MUL_HSU,N,DIV_X, Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - MULW-> List(xpr64,N,N,BR_N, Y,Y,A2_X, DW_32, FN_X, N,M_X, MT_X, Y,MUL_LO, N,DIV_X, Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - - DIV-> List(Y, N,N,BR_N, Y,Y,A2_X, DW_XPR,FN_X, N,M_X, MT_X, N,MUL_X, Y,DIV_D, Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - DIVU-> List(Y, N,N,BR_N, Y,Y,A2_X, DW_XPR,FN_X, N,M_X, MT_X, N,MUL_X, Y,DIV_DU,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - REM-> List(Y, N,N,BR_N, Y,Y,A2_X, DW_XPR,FN_X, N,M_X, MT_X, N,MUL_X, Y,DIV_R, Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - REMU-> List(Y, N,N,BR_N, Y,Y,A2_X, DW_XPR,FN_X, N,M_X, MT_X, N,MUL_X, Y,DIV_RU,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - DIVW-> List(xpr64,N,N,BR_N, Y,Y,A2_X, DW_32, FN_X, N,M_X, MT_X, N,MUL_X, Y,DIV_D, Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - DIVUW-> List(xpr64,N,N,BR_N, Y,Y,A2_X, DW_32, FN_X, N,M_X, MT_X, N,MUL_X, Y,DIV_DU,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - REMW-> List(xpr64,N,N,BR_N, Y,Y,A2_X, DW_32, FN_X, N,M_X, MT_X, N,MUL_X, Y,DIV_R, Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - REMUW-> List(xpr64,N,N,BR_N, Y,Y,A2_X, DW_32, FN_X, N,M_X, MT_X, N,MUL_X, Y,DIV_RU,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - - SYSCALL-> List(Y, N,N,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_X, WB_X, PCR_N,SYNC_N,N,Y,N,N), - SETPCR-> List(Y, N,N,BR_N, N,N,A2_ITYPE,DW_XPR,FN_OP2, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_S,SYNC_N,N,N,Y,Y), - CLEARPCR-> List(Y, N,N,BR_N, N,N,A2_ITYPE,DW_XPR,FN_OP2, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_C,SYNC_N,N,N,Y,Y), - ERET-> List(Y, N,N,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_X, WB_X, PCR_N,SYNC_N,Y,N,Y,N), - FENCE-> List(Y, N,N,BR_N, N,N,A2_X, DW_X, FN_X, Y,M_FENCE, MT_X, N,MUL_X, N,DIV_X, N,WA_X, WB_X, PCR_N,SYNC_D,N,N,N,N), - FENCE_I-> List(Y, N,N,BR_N, N,N,A2_X, DW_X, FN_X, Y,M_FLA, MT_X, N,MUL_X, N,DIV_X, N,WA_X, WB_X, PCR_N,SYNC_I,N,N,N,Y), - CFLUSH-> List(Y, N,N,BR_N, N,N,A2_X, DW_X, FN_X, Y,M_FLA, MT_X, N,MUL_X, N,DIV_X, N,WA_X, WB_X, PCR_N,SYNC_N,N,N,Y,Y), - MFPCR-> List(Y, N,N,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_X, PCR_F,SYNC_N,N,N,Y,Y), - MTPCR-> List(Y, N,N,BR_N, Y,N,A2_RTYPE,DW_XPR,FN_OP2, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_T,SYNC_N,N,N,Y,Y), - RDTIME-> List(Y, N,N,BR_N, N,N,A2_X, DW_XPR,FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_TSC,PCR_N,SYNC_N,N,N,N,N), - RDCYCLE-> List(Y, N,N,BR_N, N,N,A2_X, DW_XPR,FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_TSC,PCR_N,SYNC_N,N,N,N,N), - RDINSTRET-> List(Y, N,N,BR_N, N,N,A2_X, DW_XPR,FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_IRT,PCR_N,SYNC_N,N,N,N,N)) - - val fdecode = Array( - // eret - // fp_val renx2 | syscall - // | vec_val | renx1 mem_val mul_val div_val wen pcr | | privileged - // val | | brtype | | s_alu2 dw alu | mem_cmd mem_type| mul_fn | div_fn | s_wa s_wb | sync | | | replay_next - // | | | | | | | | | | | | | | | | | | | | | | | | | - FCVT_S_D-> List(FPU_Y,Y,N,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FCVT_D_S-> List(FPU_Y,Y,N,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FSGNJ_S-> List(FPU_Y,Y,N,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FSGNJ_D-> List(FPU_Y,Y,N,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FSGNJX_S-> List(FPU_Y,Y,N,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FSGNJX_D-> List(FPU_Y,Y,N,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FSGNJN_S-> List(FPU_Y,Y,N,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FSGNJN_D-> List(FPU_Y,Y,N,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FMIN_S-> List(FPU_Y,Y,N,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FMIN_D-> List(FPU_Y,Y,N,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FMAX_S-> List(FPU_Y,Y,N,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FMAX_D-> List(FPU_Y,Y,N,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FADD_S-> List(FPU_Y,Y,N,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FADD_D-> List(FPU_Y,Y,N,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FSUB_S-> List(FPU_Y,Y,N,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FSUB_D-> List(FPU_Y,Y,N,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FMUL_S-> List(FPU_Y,Y,N,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FMUL_D-> List(FPU_Y,Y,N,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FMADD_S-> List(FPU_Y,Y,N,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FMADD_D-> List(FPU_Y,Y,N,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FMSUB_S-> List(FPU_Y,Y,N,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FMSUB_D-> List(FPU_Y,Y,N,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FNMADD_S-> List(FPU_Y,Y,N,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FNMADD_D-> List(FPU_Y,Y,N,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FNMSUB_S-> List(FPU_Y,Y,N,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FNMSUB_D-> List(FPU_Y,Y,N,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - MFTX_S-> List(FPU_Y,Y,N,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - MFTX_D-> List(FPU_Y,Y,N,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FCVT_W_S-> List(FPU_Y,Y,N,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FCVT_W_D-> List(FPU_Y,Y,N,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FCVT_WU_S-> List(FPU_Y,Y,N,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FCVT_WU_D-> List(FPU_Y,Y,N,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FCVT_L_S-> List(FPU_Y,Y,N,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FCVT_L_D-> List(FPU_Y,Y,N,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FCVT_LU_S-> List(FPU_Y,Y,N,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FCVT_LU_D-> List(FPU_Y,Y,N,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FEQ_S-> List(FPU_Y,Y,N,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FEQ_D-> List(FPU_Y,Y,N,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FLT_S-> List(FPU_Y,Y,N,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FLT_D-> List(FPU_Y,Y,N,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FLE_S-> List(FPU_Y,Y,N,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FLE_D-> List(FPU_Y,Y,N,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - MXTF_S-> List(FPU_Y,Y,N,BR_N, N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - MXTF_D-> List(FPU_Y,Y,N,BR_N, N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FCVT_S_W-> List(FPU_Y,Y,N,BR_N, N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FCVT_D_W-> List(FPU_Y,Y,N,BR_N, N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FCVT_S_WU-> List(FPU_Y,Y,N,BR_N, N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FCVT_D_WU-> List(FPU_Y,Y,N,BR_N, N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FCVT_S_L-> List(FPU_Y,Y,N,BR_N, N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FCVT_D_L-> List(FPU_Y,Y,N,BR_N, N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FCVT_S_LU-> List(FPU_Y,Y,N,BR_N, N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FCVT_D_LU-> List(FPU_Y,Y,N,BR_N, N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - MFFSR-> List(FPU_Y,Y,N,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - MTFSR-> List(FPU_Y,Y,N,BR_N, N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FLW-> List(FPU_Y,Y,N,BR_N, N,Y,A2_ITYPE,DW_XPR,FN_ADD, Y,M_XRD, MT_W, N,MUL_X, N,DIV_X, N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - FLD-> List(FPU_Y,Y,N,BR_N, N,Y,A2_ITYPE,DW_XPR,FN_ADD, Y,M_XRD, MT_D, N,MUL_X, N,DIV_X, N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - FSW-> List(FPU_Y,Y,N,BR_N, N,Y,A2_BTYPE,DW_XPR,FN_ADD, Y,M_XWR, MT_W, N,MUL_X, N,DIV_X, N,WA_X, WB_ALU,PCR_N,SYNC_N,N,N,N,N), - FSD-> List(FPU_Y,Y,N,BR_N, N,Y,A2_BTYPE,DW_XPR,FN_ADD, Y,M_XWR, MT_D, N,MUL_X, N,DIV_X, N,WA_X, WB_ALU,PCR_N,SYNC_N,N,N,N,N)) - - val vdecode = Array( - // eret - // fp_val renx2 | syscall - // | vec_val | renx1 mem_val mul_val div_val wen pcr | | privileged - // val | | brtype | | s_alu2 dw alu | mem_cmd mem_type| mul_fn | div_fn | s_wa s_wb | sync | | | replay_next - // | | | | | | | | | | | | | | | | | | | | | | | | | - VVCFGIVL-> List(VEC_Y,N,Y,BR_N, N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,Y), - VVCFG-> List(VEC_Y,N,Y,BR_N, Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,Y), - VSETVL-> List(VEC_Y,N,Y,BR_N, N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,Y), - VF-> List(VEC_Y,N,Y,BR_N, N,Y,A2_ITYPE,DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_X, WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VMVV-> List(VEC_Y,N,Y,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - VMSV-> List(VEC_Y,N,Y,BR_N, N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VFMVV-> List(VEC_Y,N,Y,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FENCE_V_L-> List(VEC_Y,N,Y,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N), - FENCE_V_G-> List(VEC_Y,N,Y,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_X, WB_X, PCR_N,SYNC_D,N,N,N,N), - VLD-> List(VEC_Y,N,Y,BR_N, N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VLW-> List(VEC_Y,N,Y,BR_N, N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VLWU-> List(VEC_Y,N,Y,BR_N, N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VLH-> List(VEC_Y,N,Y,BR_N, N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VLHU-> List(VEC_Y,N,Y,BR_N, N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VLB-> List(VEC_Y,N,Y,BR_N, N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VLBU-> List(VEC_Y,N,Y,BR_N, N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VSD-> List(VEC_Y,N,Y,BR_N, N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VSW-> List(VEC_Y,N,Y,BR_N, N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VSH-> List(VEC_Y,N,Y,BR_N, N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VSB-> List(VEC_Y,N,Y,BR_N, N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VFLD-> List(VEC_Y,N,Y,BR_N, N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VFLW-> List(VEC_Y,N,Y,BR_N, N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VFSD-> List(VEC_Y,N,Y,BR_N, N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VFSW-> List(VEC_Y,N,Y,BR_N, N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VLSTD-> List(VEC_Y,N,Y,BR_N, Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,DIV_X, N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VLSTW-> List(VEC_Y,N,Y,BR_N, Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,DIV_X, N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VLSTWU-> List(VEC_Y,N,Y,BR_N, Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,DIV_X, N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VLSTH-> List(VEC_Y,N,Y,BR_N, Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,DIV_X, N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VLSTHU-> List(VEC_Y,N,Y,BR_N, Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,DIV_X, N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VLSTB-> List(VEC_Y,N,Y,BR_N, Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,DIV_X, N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VLSTBU-> List(VEC_Y,N,Y,BR_N, Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,DIV_X, N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VSSTD-> List(VEC_Y,N,Y,BR_N, Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,DIV_X, N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VSSTW-> List(VEC_Y,N,Y,BR_N, Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,DIV_X, N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VSSTH-> List(VEC_Y,N,Y,BR_N, Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,DIV_X, N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VSSTB-> List(VEC_Y,N,Y,BR_N, Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,DIV_X, N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VFLSTD-> List(VEC_Y,N,Y,BR_N, Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,DIV_X, N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VFLSTW-> List(VEC_Y,N,Y,BR_N, Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,DIV_X, N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VFSSTD-> List(VEC_Y,N,Y,BR_N, Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,DIV_X, N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VFSSTW-> List(VEC_Y,N,Y,BR_N, Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,DIV_X, N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - - VENQCMD-> List(VEC_Y,N,Y,BR_N, Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,Y,N), - VENQIMM1-> List(VEC_Y,N,Y,BR_N, Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,Y,N), - VENQIMM2-> List(VEC_Y,N,Y,BR_N, Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,Y,N), - VENQCNT-> List(VEC_Y,N,Y,BR_N, Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,Y,N), - VXCPTEVAC-> List(VEC_Y,N,Y,BR_N, N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,Y,N), - VXCPTKILL-> List(VEC_Y,N,Y,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_X, WB_X, PCR_N,SYNC_N,N,N,Y,N), - VXCPTHOLD-> List(VEC_Y,N,Y,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_X, WB_X, PCR_N,SYNC_N,N,N,Y,N)) + // jalr eret + // fp_val | renx2 div_val | syscall + // | vec_val | | renx1 mem_val mul_val | wen pcr | | privileged + // val | | brtype | | | s_alu2 dw alu | mem_cmd mem_type| mul_fn | | s_wa s_wb | sync | | | replay_next + // | | | | | | | | | | | | | | | | | | | | | | | | | + List(N, X,X,BR_X, X,X,X,A2_X, DW_X, FN_X, N,M_X, MT_X, X,MUL_X, X,X,WA_X, WB_X, PCR_X,SYNC_X,X,X,X,X) + + val xdecode = Array( + // jalr eret + // fp_val | renx2 div_val | syscall + // | vec_val | | renx1 mem_val mul_val | wen pcr | | privileged + // val | | brtype | | | s_alu2 dw alu | mem_cmd mem_type| mul_fn | | s_wa s_wb | sync | | | replay_next + // | | | | | | | | | | | | | | | | | | | | | | | | | + BNE-> List(Y, N,N,BR_NE, N,Y,Y,A2_BTYPE,DW_X, FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N), + BEQ-> List(Y, N,N,BR_EQ, N,Y,Y,A2_BTYPE,DW_X, FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N), + BLT-> List(Y, N,N,BR_LT, N,Y,Y,A2_BTYPE,DW_X, FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N), + BLTU-> List(Y, N,N,BR_LTU,N,Y,Y,A2_BTYPE,DW_X, FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N), + BGE-> List(Y, N,N,BR_GE, N,Y,Y,A2_BTYPE,DW_X, FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N), + BGEU-> List(Y, N,N,BR_GEU,N,Y,Y,A2_BTYPE,DW_X, FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N), + + J-> List(Y, N,N,BR_J, N,N,N,A2_JTYPE,DW_X, FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N), + JAL-> List(Y, N,N,BR_J, N,N,N,A2_JTYPE,DW_X, FN_ADD, N,M_X, MT_X, N,MUL_X, N,Y,WA_RA,WB_PC, PCR_N,SYNC_N,N,N,N,N), + JALR_C-> List(Y, N,N,BR_N, Y,N,Y,A2_ITYPE,DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_PC, PCR_N,SYNC_N,N,N,N,N), + JALR_J-> List(Y, N,N,BR_N, Y,N,Y,A2_ITYPE,DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_PC, PCR_N,SYNC_N,N,N,N,N), + JALR_R-> List(Y, N,N,BR_N, Y,N,Y,A2_ITYPE,DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_PC, PCR_N,SYNC_N,N,N,N,N), + RDNPC-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_PC, PCR_N,SYNC_N,N,N,N,N), + + LB-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_ADD, Y,M_XRD, MT_B, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + LH-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_ADD, Y,M_XRD, MT_H, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + LW-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_ADD, Y,M_XRD, MT_W, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + LD-> List(xpr64,N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_ADD, Y,M_XRD, MT_D, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + LBU-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_ADD, Y,M_XRD, MT_BU,N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + LHU-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_ADD, Y,M_XRD, MT_HU,N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + LWU-> List(xpr64,N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_ADD, Y,M_XRD, MT_WU,N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + SB-> List(Y, N,N,BR_N, N,Y,Y,A2_BTYPE,DW_XPR,FN_ADD, Y,M_XWR, MT_B, N,MUL_X, N,N,WA_X, WB_ALU,PCR_N,SYNC_N,N,N,N,N), + SH-> List(Y, N,N,BR_N, N,Y,Y,A2_BTYPE,DW_XPR,FN_ADD, Y,M_XWR, MT_H, N,MUL_X, N,N,WA_X, WB_ALU,PCR_N,SYNC_N,N,N,N,N), + SW-> List(Y, N,N,BR_N, N,Y,Y,A2_BTYPE,DW_XPR,FN_ADD, Y,M_XWR, MT_W, N,MUL_X, N,N,WA_X, WB_ALU,PCR_N,SYNC_N,N,N,N,N), + SD-> List(xpr64,N,N,BR_N, N,Y,Y,A2_BTYPE,DW_XPR,FN_ADD, Y,M_XWR, MT_D, N,MUL_X, N,N,WA_X, WB_ALU,PCR_N,SYNC_N,N,N,N,N), + + AMOADD_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_ADD, MT_W, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + AMOSWAP_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_SWAP,MT_W, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + AMOAND_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_AND, MT_W, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + AMOOR_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_OR, MT_W, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + AMOMIN_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_MIN, MT_W, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + AMOMINU_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_MINU,MT_W, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + AMOMAX_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_MAX, MT_W, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + AMOMAXU_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_MAXU,MT_W, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + AMOADD_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_ADD, MT_D, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + AMOSWAP_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_SWAP,MT_D, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + AMOAND_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_AND, MT_D, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + AMOOR_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_OR, MT_D, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + AMOMIN_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_MIN, MT_D, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + AMOMINU_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_MINU,MT_D, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + AMOMAX_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_MAX, MT_D, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + AMOMAXU_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_MAXU,MT_D, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + + LUI-> List(Y, N,N,BR_N, N,N,N,A2_LTYPE,DW_XPR,FN_OP2, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + ADDI-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + SLTI -> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_SLT, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + SLTIU-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_SLTU,N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + ANDI-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_AND, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + ORI-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_OR, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + XORI-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_XOR, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + SLLI-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_SL, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + SRLI-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_SR, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + SRAI-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_SRA, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + ADD-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + SUB-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_SUB, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + SLT-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_SLT, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + SLTU-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_SLTU,N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + riscvAND-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_AND, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + riscvOR-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_OR, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + riscvXOR-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_XOR, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + SLL-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_SL, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + SRL-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_SR, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + SRA-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_SRA, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + + ADDIW-> List(xpr64,N,N,BR_N, N,N,Y,A2_ITYPE,DW_32,FN_ADD, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + SLLIW-> List(xpr64,N,N,BR_N, N,N,Y,A2_ITYPE,DW_32,FN_SL, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + SRLIW-> List(xpr64,N,N,BR_N, N,N,Y,A2_ITYPE,DW_32,FN_SR, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + SRAIW-> List(xpr64,N,N,BR_N, N,N,Y,A2_ITYPE,DW_32,FN_SRA, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + ADDW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RTYPE,DW_32,FN_ADD, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + SUBW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RTYPE,DW_32,FN_SUB, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + SLLW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RTYPE,DW_32,FN_SL, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + SRLW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RTYPE,DW_32,FN_SR, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + SRAW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RTYPE,DW_32,FN_SRA, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + + MUL-> List(Y, N,N,BR_N, N,Y,Y,A2_X, DW_XPR,FN_X, N,M_X, MT_X, Y,MUL_LO, N,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + MULH-> List(Y, N,N,BR_N, N,Y,Y,A2_X, DW_XPR,FN_X, N,M_X, MT_X, Y,MUL_H, N,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + MULHU-> List(Y, N,N,BR_N, N,Y,Y,A2_X, DW_XPR,FN_X, N,M_X, MT_X, Y,MUL_HU, N,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + MULHSU-> List(Y, N,N,BR_N, N,Y,Y,A2_X, DW_XPR,FN_X, N,M_X, MT_X, Y,MUL_HSU,N,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + MULW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_X, DW_32, FN_X, N,M_X, MT_X, Y,MUL_LO, N,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + + DIV-> List(Y, N,N,BR_N, N,Y,Y,A2_X, DW_XPR,FN_X, N,M_X, MT_X, N,DIV_D, Y,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + DIVU-> List(Y, N,N,BR_N, N,Y,Y,A2_X, DW_XPR,FN_X, N,M_X, MT_X, N,DIV_DU, Y,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + REM-> List(Y, N,N,BR_N, N,Y,Y,A2_X, DW_XPR,FN_X, N,M_X, MT_X, N,DIV_R, Y,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + REMU-> List(Y, N,N,BR_N, N,Y,Y,A2_X, DW_XPR,FN_X, N,M_X, MT_X, N,DIV_RU, Y,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + DIVW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_X, DW_32, FN_X, N,M_X, MT_X, N,DIV_D, Y,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + DIVUW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_X, DW_32, FN_X, N,M_X, MT_X, N,DIV_DU, Y,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + REMW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_X, DW_32, FN_X, N,M_X, MT_X, N,DIV_R, Y,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + REMUW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_X, DW_32, FN_X, N,M_X, MT_X, N,DIV_RU, Y,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + + SYSCALL-> List(Y, N,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_X, WB_X, PCR_N,SYNC_N,N,Y,N,N), + SETPCR-> List(Y, N,N,BR_N, N,N,N,A2_ITYPE,DW_XPR,FN_OP2, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_S,SYNC_N,N,N,Y,Y), + CLEARPCR-> List(Y, N,N,BR_N, N,N,N,A2_ITYPE,DW_XPR,FN_OP2, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_C,SYNC_N,N,N,Y,Y), + ERET-> List(Y, N,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_X, WB_X, PCR_N,SYNC_N,Y,N,Y,N), + FENCE-> List(Y, N,N,BR_N, N,N,N,A2_X, DW_X, FN_X, Y,M_FENCE, MT_X, N,MUL_X, N,N,WA_X, WB_X, PCR_N,SYNC_D,N,N,N,N), + FENCE_I-> List(Y, N,N,BR_N, N,N,N,A2_X, DW_X, FN_X, Y,M_FLA, MT_X, N,MUL_X, N,N,WA_X, WB_X, PCR_N,SYNC_I,N,N,N,Y), + CFLUSH-> List(Y, N,N,BR_N, N,N,N,A2_X, DW_X, FN_X, Y,M_FLA, MT_X, N,MUL_X, N,N,WA_X, WB_X, PCR_N,SYNC_N,N,N,Y,Y), + MFPCR-> List(Y, N,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_X, PCR_F,SYNC_N,N,N,Y,Y), + MTPCR-> List(Y, N,N,BR_N, N,Y,N,A2_RTYPE,DW_XPR,FN_OP2, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_T,SYNC_N,N,N,Y,Y), + RDTIME-> List(Y, N,N,BR_N, N,N,N,A2_X, DW_XPR,FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_TSC,PCR_N,SYNC_N,N,N,N,N), + RDCYCLE-> List(Y, N,N,BR_N, N,N,N,A2_X, DW_XPR,FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_TSC,PCR_N,SYNC_N,N,N,N,N), + RDINSTRET-> List(Y, N,N,BR_N, N,N,N,A2_X, DW_XPR,FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_IRT,PCR_N,SYNC_N,N,N,N,N)) + + val fdecode = Array( + // jalr eret + // fp_val | renx2 div_val | syscall + // | vec_val | | renx1 mem_val mul_val | wen pcr | | privileged + // val | | brtype | | | s_alu2 dw alu | mem_cmd mem_type| mul_fn | | s_wa s_wb | sync | | | replay_next + // | | | | | | | | | | | | | | | | | | | | | | | | | + FCVT_S_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FCVT_D_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FSGNJ_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FSGNJ_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FSGNJX_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FSGNJX_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FSGNJN_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FSGNJN_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FMIN_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FMIN_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FMAX_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FMAX_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FADD_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FADD_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FSUB_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FSUB_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FMUL_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FMUL_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FMADD_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FMADD_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FMSUB_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FMSUB_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FNMADD_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FNMADD_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FNMSUB_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FNMSUB_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + MFTX_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + MFTX_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FCVT_W_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FCVT_W_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FCVT_WU_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FCVT_WU_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FCVT_L_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FCVT_L_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FCVT_LU_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FCVT_LU_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FEQ_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FEQ_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FLT_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FLT_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FLE_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FLE_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + MXTF_S-> List(FPU_Y,Y,N,BR_N, N,N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + MXTF_D-> List(FPU_Y,Y,N,BR_N, N,N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FCVT_S_W-> List(FPU_Y,Y,N,BR_N, N,N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FCVT_D_W-> List(FPU_Y,Y,N,BR_N, N,N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FCVT_S_WU-> List(FPU_Y,Y,N,BR_N, N,N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FCVT_D_WU-> List(FPU_Y,Y,N,BR_N, N,N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FCVT_S_L-> List(FPU_Y,Y,N,BR_N, N,N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FCVT_D_L-> List(FPU_Y,Y,N,BR_N, N,N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FCVT_S_LU-> List(FPU_Y,Y,N,BR_N, N,N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FCVT_D_LU-> List(FPU_Y,Y,N,BR_N, N,N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + MFFSR-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + MTFSR-> List(FPU_Y,Y,N,BR_N, N,N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FLW-> List(FPU_Y,Y,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_ADD, Y,M_XRD, MT_W, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + FLD-> List(FPU_Y,Y,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_ADD, Y,M_XRD, MT_D, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + FSW-> List(FPU_Y,Y,N,BR_N, N,N,Y,A2_BTYPE,DW_XPR,FN_ADD, Y,M_XWR, MT_W, N,MUL_X, N,N,WA_X, WB_ALU,PCR_N,SYNC_N,N,N,N,N), + FSD-> List(FPU_Y,Y,N,BR_N, N,N,Y,A2_BTYPE,DW_XPR,FN_ADD, Y,M_XWR, MT_D, N,MUL_X, N,N,WA_X, WB_ALU,PCR_N,SYNC_N,N,N,N,N)) + + val vdecode = Array( + // jalr eret + // fp_val | renx2 div_val | syscall + // | vec_val | | renx1 mem_val mul_val | wen pcr | | privileged + // val | | brtype | | | s_alu2 dw alu | mem_cmd mem_type| mul_fn | | s_wa s_wb | sync | | | replay_next + // | | | | | | | | | | | | | | | | | | | | | | | | | + VVCFGIVL-> List(VEC_Y,N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,Y), + VVCFG-> List(VEC_Y,N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,Y), + VSETVL-> List(VEC_Y,N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,Y), + VF-> List(VEC_Y,N,Y,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_X, WB_ALU,PCR_N,SYNC_N,N,N,N,N), + VMVV-> List(VEC_Y,N,Y,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + VMSV-> List(VEC_Y,N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + VFMVV-> List(VEC_Y,N,Y,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FENCE_V_L-> List(VEC_Y,N,Y,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N), + FENCE_V_G-> List(VEC_Y,N,Y,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_X, WB_X, PCR_N,SYNC_D,N,N,N,N), + VLD-> List(VEC_Y,N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + VLW-> List(VEC_Y,N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + VLWU-> List(VEC_Y,N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + VLH-> List(VEC_Y,N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + VLHU-> List(VEC_Y,N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + VLB-> List(VEC_Y,N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + VLBU-> List(VEC_Y,N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + VSD-> List(VEC_Y,N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + VSW-> List(VEC_Y,N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + VSH-> List(VEC_Y,N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + VSB-> List(VEC_Y,N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + VFLD-> List(VEC_Y,N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + VFLW-> List(VEC_Y,N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + VFSD-> List(VEC_Y,N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + VFSW-> List(VEC_Y,N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + VLSTD-> List(VEC_Y,N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + VLSTW-> List(VEC_Y,N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + VLSTWU-> List(VEC_Y,N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + VLSTH-> List(VEC_Y,N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + VLSTHU-> List(VEC_Y,N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + VLSTB-> List(VEC_Y,N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + VLSTBU-> List(VEC_Y,N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + VSSTD-> List(VEC_Y,N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + VSSTW-> List(VEC_Y,N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + VSSTH-> List(VEC_Y,N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + VSSTB-> List(VEC_Y,N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + VFLSTD-> List(VEC_Y,N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + VFLSTW-> List(VEC_Y,N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + VFSSTD-> List(VEC_Y,N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + VFSSTW-> List(VEC_Y,N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + + VENQCMD-> List(VEC_Y,N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,Y,N), + VENQIMM1-> List(VEC_Y,N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,Y,N), + VENQIMM2-> List(VEC_Y,N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,Y,N), + VENQCNT-> List(VEC_Y,N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,Y,N), + VXCPTEVAC-> List(VEC_Y,N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,Y,N), + VXCPTKILL-> List(VEC_Y,N,Y,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_X, WB_X, PCR_N,SYNC_N,N,N,Y,N), + VXCPTHOLD-> List(VEC_Y,N,Y,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_X, WB_X, PCR_N,SYNC_N,N,N,Y,N)) } class rocketCtrl extends Component @@ -340,32 +337,22 @@ class rocketCtrl extends Component val cs = DecodeLogic(io.dpath.inst, rocketCtrlDecode.decode_default, decode_table) - val id_int_val :: id_fp_val :: id_vec_val :: id_br_type :: id_renx2 :: id_renx1 :: id_sel_alu2 :: id_fn_dw :: id_fn_alu :: cs0 = cs - val id_mem_val :: id_mem_cmd :: id_mem_type :: id_mul_val :: id_mul_fn :: id_div_val :: id_div_fn :: id_wen :: id_sel_wa :: id_sel_wb :: cs1 = cs0 + val id_int_val :: id_fp_val :: id_vec_val :: id_br_type :: id_jalr :: id_renx2 :: id_renx1 :: id_sel_alu2 :: id_fn_dw :: id_fn_alu :: cs0 = cs + val id_mem_val :: id_mem_cmd :: id_mem_type :: id_mul_val :: id_mul_fn :: id_div_val :: id_wen :: id_sel_wa :: id_sel_wb :: cs1 = cs0 val id_pcr :: id_sync :: id_eret :: id_syscall :: id_privileged :: id_replay_next :: Nil = cs1 - val if_reg_xcpt_ma_inst = Reg(io.dpath.xcpt_ma_inst, resetVal = Bool(false)); - val id_raddr3 = io.dpath.inst(16,12); val id_raddr2 = io.dpath.inst(21,17); val id_raddr1 = io.dpath.inst(26,22); val id_waddr = Mux(id_sel_wa === WA_RA, RA, io.dpath.inst(31,27)); - - val wb_reg_div_mul_val = Reg(resetVal = Bool(false)) - val wb_reg_dcache_miss = Reg(io.dmem.resp.bits.miss || io.dmem.resp.bits.nack, resetVal = Bool(false)); - - val id_reg_valid = Reg(resetVal = Bool(false)); - val id_reg_btb_hit = Reg(resetVal = Bool(false)); - val id_reg_xcpt_itlb = Reg(resetVal = Bool(false)); - val id_reg_xcpt_ma_inst = Reg(resetVal = Bool(false)); - val id_reg_icmiss = Reg(resetVal = Bool(false)); - val id_reg_replay = Reg(resetVal = Bool(false)); - val id_load_use = Bool(); + val id_load_use = Bool(); val ex_reg_br_type = Reg(){Bits()} + val ex_reg_jalr = Reg(){Bool()} val ex_reg_btb_hit = Reg(){Bool()}; val ex_reg_div_val = Reg(){Bool()}; val ex_reg_mul_val = Reg(){Bool()}; + val ex_reg_mul_fn = Reg(){UFix()}; val ex_reg_mem_val = Reg(){Bool()}; val ex_reg_mem_cmd = Reg(){Bits()}; val ex_reg_mem_type = Reg(){UFix(width = 3)}; @@ -385,7 +372,7 @@ class rocketCtrl extends Component val ex_reg_fp_val = Reg(resetVal = Bool(false)); val ex_reg_fp_sboard_set = Reg(resetVal = Bool(false)); val ex_reg_vec_val = Reg(resetVal = Bool(false)); - val ex_reg_replay = Reg(resetVal = Bool(false)); + val ex_reg_replay_next = Reg(resetVal = Bool(false)); val ex_reg_load_use = Reg(resetVal = Bool(false)); val mem_reg_valid = Reg(resetVal = Bool(false)); @@ -404,6 +391,7 @@ class rocketCtrl extends Component val mem_reg_xcpt_syscall = Reg(resetVal = Bool(false)); val mem_reg_fp_val = Reg(resetVal = Bool(false)); val mem_reg_replay = Reg(resetVal = Bool(false)); + val mem_reg_replay_next = Reg(resetVal = Bool(false)); val mem_reg_kill = Reg(resetVal = Bool(false)); val mem_reg_fp_sboard_set = Reg(resetVal = Bool(false)); @@ -415,30 +403,16 @@ class rocketCtrl extends Component val wb_reg_eret = Reg(resetVal = Bool(false)); val wb_reg_exception = Reg(resetVal = Bool(false)); val wb_reg_replay = Reg(resetVal = Bool(false)); + val wb_reg_replay_next = Reg(resetVal = Bool(false)); val wb_reg_cause = Reg(){UFix()}; val wb_reg_fp_val = Reg(resetVal = Bool(false)); val wb_reg_fp_sboard_set = Reg(resetVal = Bool(false)); + val wb_reg_dcache_miss = Reg(io.dmem.resp.bits.miss || io.dmem.resp.bits.nack, resetVal = Bool(false)); + val wb_reg_div_mul_val = Reg(resetVal = Bool(false)) val take_pc = Bool() val take_pc_wb = Bool() - - when (!io.dpath.stalld) { - when (io.dpath.killf) { - id_reg_valid := Bool(false) - id_reg_btb_hit := Bool(false); - id_reg_xcpt_ma_inst := Bool(false); - id_reg_xcpt_itlb := Bool(false); - id_reg_replay := !take_pc; // replay on I$ miss - } - .otherwise{ - id_reg_valid := Bool(true) - id_reg_btb_hit := io.dpath.btb_hit; - id_reg_xcpt_ma_inst := if_reg_xcpt_ma_inst; - id_reg_xcpt_itlb := io.xcpt_itlb; - id_reg_replay := id_replay_next - } - id_reg_icmiss := !io.imem.resp_val; - } + val ctrl_killm = Bool() var vec_replay = Bool(false) var vec_stalld = Bool(false) @@ -499,6 +473,7 @@ class rocketCtrl extends Component when (reset.toBool || io.dpath.killd) { ex_reg_br_type := BR_N; + ex_reg_jalr := Bool(false) ex_reg_btb_hit := Bool(false); ex_reg_div_val := Bool(false); ex_reg_mul_val := Bool(false); @@ -517,55 +492,50 @@ class rocketCtrl extends Component ex_reg_fp_val := Bool(false); ex_reg_fp_sboard_set := Bool(false); ex_reg_vec_val := Bool(false); - ex_reg_replay := Bool(false); + ex_reg_replay_next := Bool(false); ex_reg_load_use := Bool(false); } .otherwise { ex_reg_br_type := id_br_type; - ex_reg_btb_hit := id_reg_btb_hit; + ex_reg_jalr := id_jalr + ex_reg_btb_hit := io.imem.resp.bits.taken ex_reg_div_val := id_div_val.toBool && id_waddr != UFix(0); ex_reg_mul_val := id_mul_val.toBool && id_waddr != UFix(0); + ex_reg_mul_fn := id_mul_fn.toUFix ex_reg_mem_val := id_mem_val.toBool; - ex_reg_valid := id_reg_valid + ex_reg_valid := Bool(true) ex_reg_pcr := id_pcr ex_reg_wen := id_wen.toBool && id_waddr != UFix(0); ex_reg_fp_wen := id_fp_val && io.fpu.dec.wen ex_reg_eret := id_eret.toBool; ex_reg_flush_inst := (id_sync === SYNC_I); - ex_reg_xcpt_ma_inst := id_reg_xcpt_ma_inst; - ex_reg_xcpt_itlb := id_reg_xcpt_itlb; + ex_reg_xcpt_ma_inst := io.imem.resp.bits.xcpt_ma + ex_reg_xcpt_itlb := io.imem.resp.bits.xcpt_if ex_reg_xcpt_illegal := illegal_inst; ex_reg_xcpt_privileged := (id_privileged & ~io.dpath.status(SR_S)).toBool; ex_reg_xcpt_syscall := id_syscall.toBool; ex_reg_fp_val := id_fp_val ex_reg_fp_sboard_set := io.fpu.dec.sboard ex_reg_vec_val := id_vec_val.toBool - ex_reg_replay := id_reg_replay + ex_reg_replay_next := id_replay_next ex_reg_load_use := id_load_use; } + ex_reg_xcpt_interrupt := !take_pc && id_interrupt ex_reg_mem_cmd := id_mem_cmd ex_reg_mem_type := id_mem_type.toUFix - ex_reg_xcpt_interrupt := id_reg_valid && id_interrupt && !take_pc ex_reg_cause := id_cause - val beq = io.dpath.br_eq; - val bne = ~io.dpath.br_eq; - val blt = io.dpath.br_lt; - val bltu = io.dpath.br_ltu; - val bge = ~io.dpath.br_lt; - val bgeu = ~io.dpath.br_ltu; - - val br_taken = !(wb_reg_dcache_miss && ex_reg_load_use) && - ((ex_reg_br_type === BR_EQ) && beq || - (ex_reg_br_type === BR_NE) && bne || - (ex_reg_br_type === BR_LT) && blt || - (ex_reg_br_type === BR_LTU) && bltu || - (ex_reg_br_type === BR_GE) && bge || - (ex_reg_br_type === BR_GEU) && bgeu || - (ex_reg_br_type === BR_J)) // treat J/JAL like taken branches - val jr_taken = !(wb_reg_dcache_miss && ex_reg_load_use) && ex_reg_br_type === BR_JR + val br_taken = + Mux(ex_reg_br_type === BR_EQ, io.dpath.br_eq, + Mux(ex_reg_br_type === BR_NE, ~io.dpath.br_eq, + Mux(ex_reg_br_type === BR_LT, io.dpath.br_lt, + Mux(ex_reg_br_type === BR_GE, ~io.dpath.br_lt, + Mux(ex_reg_br_type === BR_LTU, io.dpath.br_ltu, + Mux(ex_reg_br_type === BR_GEU, ~io.dpath.br_ltu, + ex_reg_br_type === BR_J)))))) - val mem_reg_div_mul_val = Reg(){Bool()}; + val mem_reg_div_val = Reg(){Bool()} + val mem_reg_mul_val = Reg(){Bool()} val mem_reg_eret = Reg(){Bool()}; val mem_reg_mem_val = Reg(){Bool()}; val mem_reg_mem_cmd = Reg(){Bits()} @@ -574,7 +544,8 @@ class rocketCtrl extends Component when (reset.toBool || io.dpath.killx) { mem_reg_valid := Bool(false); mem_reg_pcr := PCR_N - mem_reg_div_mul_val := Bool(false); + mem_reg_div_val := Bool(false) + mem_reg_mul_val := Bool(false) mem_reg_wen := Bool(false); mem_reg_fp_wen := Bool(false); mem_reg_eret := Bool(false); @@ -589,11 +560,13 @@ class rocketCtrl extends Component mem_reg_xcpt_syscall := Bool(false); mem_reg_fp_val := Bool(false); mem_reg_fp_sboard_set := Bool(false) + mem_reg_replay_next := Bool(false) } .otherwise { mem_reg_valid := ex_reg_valid mem_reg_pcr := ex_reg_pcr - mem_reg_div_mul_val := ex_reg_div_val || ex_reg_mul_val; + mem_reg_div_val := ex_reg_div_val && io.dpath.div_rdy + mem_reg_mul_val := ex_reg_mul_val && io.dpath.mul_rdy mem_reg_wen := ex_reg_wen; mem_reg_fp_wen := ex_reg_fp_wen; mem_reg_eret := ex_reg_eret; @@ -608,13 +581,14 @@ class rocketCtrl extends Component mem_reg_xcpt_syscall := ex_reg_xcpt_syscall; mem_reg_fp_val := ex_reg_fp_val mem_reg_fp_sboard_set := ex_reg_fp_sboard_set + mem_reg_replay_next := ex_reg_replay_next } mem_reg_mem_cmd := ex_reg_mem_cmd; mem_reg_mem_type := ex_reg_mem_type; mem_reg_xcpt_interrupt := ex_reg_xcpt_interrupt && !take_pc_wb mem_reg_cause := ex_reg_cause - when (io.dpath.killm) { + when (ctrl_killm) { wb_reg_valid := Bool(false) wb_reg_pcr := PCR_N wb_reg_wen := Bool(false); @@ -624,17 +598,19 @@ class rocketCtrl extends Component wb_reg_div_mul_val := Bool(false); wb_reg_fp_val := Bool(false) wb_reg_fp_sboard_set := Bool(false) + wb_reg_replay_next := Bool(false) } .otherwise { wb_reg_valid := mem_reg_valid wb_reg_pcr := mem_reg_pcr wb_reg_wen := mem_reg_wen; wb_reg_fp_wen := mem_reg_fp_wen; - wb_reg_eret := mem_reg_eret; + wb_reg_eret := mem_reg_eret && !mem_reg_replay wb_reg_flush_inst := mem_reg_flush_inst; - wb_reg_div_mul_val := mem_reg_div_mul_val; + wb_reg_div_mul_val := mem_reg_div_val || mem_reg_mul_val wb_reg_fp_val := mem_reg_fp_val wb_reg_fp_sboard_set := mem_reg_fp_sboard_set + wb_reg_replay_next := mem_reg_replay_next } val sboard = new rocketCtrlSboard(32, 3, 2); @@ -717,7 +693,7 @@ class rocketCtrl extends Component UFix(0,5)))))))))))); // instruction address misaligned // control transfer from ex/mem - val take_pc_ex = ex_reg_btb_hit != br_taken || jr_taken + val take_pc_ex = ex_reg_btb_hit != br_taken || ex_reg_jalr take_pc_wb := wb_reg_replay || vec_replay || wb_reg_exception || wb_reg_eret take_pc := take_pc_ex || take_pc_wb; @@ -726,22 +702,23 @@ class rocketCtrl extends Component val dmem_kill_mem = mem_reg_valid && (io.dtlb_miss || io.dmem.resp.bits.nack) val fpu_kill_mem = mem_reg_fp_val && io.fpu.nack_mem val replay_mem = dmem_kill_mem || mem_reg_wen && mem_ll_wb || mem_reg_replay || fpu_kill_mem - val kill_mem = dmem_kill_mem || mem_reg_wen && mem_ll_wb || take_pc_wb || mem_exception || mem_reg_kill || fpu_kill_mem - val kill_dcache = io.dtlb_miss || mem_reg_wen && mem_ll_wb || take_pc_wb || mem_exception || mem_reg_kill + val killm_common = mem_reg_wen && mem_ll_wb || take_pc_wb || mem_exception || mem_reg_kill + ctrl_killm := killm_common || dmem_kill_mem || fpu_kill_mem // replay execute stage PC when the D$ is blocked, when the D$ misses, // for privileged instructions, and for fence.i instructions val replay_ex = wb_reg_dcache_miss && ex_reg_load_use || mem_reg_flush_inst || - ex_reg_replay || ex_reg_mem_val && !(io.dmem.req.ready && io.dtlb_rdy) || + ex_reg_mem_val && !(io.dmem.req.ready && io.dtlb_rdy) || ex_reg_div_val && !io.dpath.div_rdy || - ex_reg_mul_val && !io.dpath.mul_rdy + ex_reg_mul_val && !io.dpath.mul_rdy || + mem_reg_replay_next val kill_ex = take_pc_wb || replay_ex mem_reg_replay := replay_ex && !take_pc_wb; mem_reg_kill := kill_ex; wb_reg_replay := replay_mem && !take_pc_wb - wb_reg_exception := mem_exception && !take_pc_wb; + wb_reg_exception := mem_exception && !take_pc_wb && !wb_reg_replay_next wb_reg_cause := mem_cause; val replay_wb = wb_reg_replay || vec_replay || io.dpath.pcr_replay @@ -755,19 +732,16 @@ class rocketCtrl extends Component io.dpath.vec_irq_aux_wen := wb_reg_exception && wb_reg_cause >= UFix(24) && wb_reg_cause < UFix(32) io.dpath.sel_pc := - Mux(wb_reg_exception, PC_PCR, // exception - Mux(replay_wb, PC_WB, // replay - Mux(wb_reg_eret, PC_PCR, // eret instruction - Mux(ex_reg_btb_hit && !br_taken, PC_EX4, // mispredicted not taken branch - Mux(!ex_reg_btb_hit && br_taken, PC_BR, // mispredicted taken branch - Mux(jr_taken, PC_JR, // taken JALR - Mux(io.dpath.btb_hit, PC_BTB, // predicted PC from BTB - PC_4))))))); // PC+4 + Mux(wb_reg_exception, PC_PCR, // exception + Mux(wb_reg_eret, PC_PCR, // eret instruction + Mux(replay_wb, PC_WB, // replay + Mux(ex_reg_jalr, PC_EX, // JALR + Mux(!ex_reg_btb_hit, PC_EX, // mispredicted taken branch + PC_EX4))))) // mispredicted not taken branch - io.dpath.wen_btb := !ex_reg_btb_hit && br_taken - io.dpath.clr_btb := ex_reg_btb_hit && !br_taken || id_reg_icmiss; - - io.imem.req_val := !reset.toBool && (take_pc_wb || !mem_reg_replay && !ex_reg_replay && (take_pc_ex || !id_reg_replay)) + io.imem.req.bits.mispredict := !take_pc_wb && !ex_reg_jalr && ex_reg_btb_hit != br_taken + io.imem.req.bits.taken := !ex_reg_btb_hit + io.imem.req.valid := take_pc // stall for RAW/WAW hazards on loads, AMOs, and mul/div in execute stage. val data_hazard_ex = ex_reg_wen && @@ -795,7 +769,7 @@ class rocketCtrl extends Component io.fpu.dec.ren2 && id_raddr2 === io.dpath.mem_waddr || io.fpu.dec.ren3 && id_raddr3 === io.dpath.mem_waddr || io.fpu.dec.wen && id_waddr === io.dpath.mem_waddr) - val id_mem_hazard = data_hazard_mem && (mem_reg_mem_val && mem_mem_cmd_bh || mem_reg_div_mul_val || mem_reg_fp_val) || + val id_mem_hazard = data_hazard_mem && (mem_reg_mem_val && mem_mem_cmd_bh || mem_reg_div_val || mem_reg_mul_val || mem_reg_fp_val) || fp_data_hazard_mem && mem_reg_fp_val id_load_use := mem_reg_mem_val && (data_hazard_mem || fp_data_hazard_mem) @@ -812,28 +786,24 @@ class rocketCtrl extends Component val id_wb_hazard = data_hazard_wb && (wb_reg_dcache_miss || wb_reg_div_mul_val) || fp_data_hazard_wb && (wb_reg_dcache_miss || wb_reg_fp_val) + val killd_common = take_pc || id_interrupt || ex_reg_replay_next + val ctrl_killd = killd_common || !io.imem.resp.valid + val ctrl_stalld = - !take_pc && - ( - id_ex_hazard || id_mem_hazard || id_wb_hazard || - id_stall_raddr1 || id_stall_raddr2 || id_stall_waddr || - id_fp_val && id_stall_fpu || - id_mem_val.toBool && !(io.dmem.req.ready && io.dtlb_rdy) || - ((id_sync === SYNC_D) || (id_sync === SYNC_I)) && !io.dmem.req.ready || - vec_stalld - ); - val ctrl_stallf = ctrl_stalld; - - val ctrl_killd = take_pc || ctrl_stalld; - val ctrl_killf = take_pc || !io.imem.resp_val; + id_ex_hazard || id_mem_hazard || id_wb_hazard || + id_stall_raddr1 || id_stall_raddr2 || id_stall_waddr || + id_fp_val && id_stall_fpu || + id_mem_val && !(io.dmem.req.ready && io.dtlb_rdy) || + (id_sync === SYNC_D || id_sync === SYNC_I) && !io.dmem.req.ready || + vec_stalld io.dpath.flush_inst := wb_reg_flush_inst; - io.dpath.stallf := ctrl_stallf; - io.dpath.stalld := ctrl_stalld; - io.dpath.killf := ctrl_killf; - io.dpath.killd := ctrl_killd; + io.dpath.stalld := !ctrl_killd && ctrl_stalld; + io.dpath.killd := ctrl_killd || ctrl_stalld io.dpath.killx := kill_ex; - io.dpath.killm := kill_mem; + io.dpath.killm := killm_common + io.imem.resp.ready := killd_common || !ctrl_stalld + io.imem.req.bits.invalidate := wb_reg_flush_inst io.dpath.mem_load := mem_reg_mem_val && mem_reg_wen io.dpath.ren2 := id_renx2.toBool; @@ -841,12 +811,15 @@ class rocketCtrl extends Component io.dpath.sel_alu2 := id_sel_alu2.toUFix io.dpath.fn_dw := id_fn_dw.toBool; io.dpath.fn_alu := id_fn_alu.toUFix - io.dpath.div_fn := id_div_fn.toUFix - io.dpath.div_val := id_div_val.toBool && id_waddr != UFix(0); - io.dpath.mul_fn := id_mul_fn.toUFix - io.dpath.mul_val := id_mul_val.toBool && id_waddr != UFix(0); + io.dpath.div_fn := ex_reg_mul_fn + io.dpath.div_val := ex_reg_div_val + io.dpath.div_kill := mem_reg_div_val && killm_common + io.dpath.mul_fn := ex_reg_mul_fn + io.dpath.mul_val := ex_reg_mul_val + io.dpath.mul_kill := mem_reg_mul_val && killm_common io.dpath.ex_fp_val:= ex_reg_fp_val; io.dpath.mem_fp_val:= mem_reg_fp_val; + io.dpath.ex_jalr := ex_reg_jalr io.dpath.ex_wen := ex_reg_wen; io.dpath.mem_wen := mem_reg_wen; io.dpath.wb_wen := wb_reg_wen; @@ -860,12 +833,12 @@ class rocketCtrl extends Component io.fpu.valid := !io.dpath.killd && id_fp_val io.fpu.killx := kill_ex - io.fpu.killm := kill_mem + io.fpu.killm := killm_common io.dtlb_val := ex_reg_mem_val io.dtlb_kill := mem_reg_kill io.dmem.req.valid := ex_reg_mem_val - io.dmem.req.bits.kill := kill_dcache + io.dmem.req.bits.kill := killm_common || io.dtlb_miss io.dmem.req.bits.cmd := ex_reg_mem_cmd io.dmem.req.bits.typ := ex_reg_mem_type } diff --git a/rocket/src/main/scala/divider.scala b/rocket/src/main/scala/divider.scala index 41268264..c3c15522 100644 --- a/rocket/src/main/scala/divider.scala +++ b/rocket/src/main/scala/divider.scala @@ -4,8 +4,10 @@ import Chisel._ import Node._ import Constants._ -class rocketDivider(w: Int, earlyOut: Boolean = false) extends Component { +class rocketDivider(earlyOut: Boolean = false) extends Component { val io = new ioMultiplier + val w0 = io.req.bits.in0.getWidth + val w = w0+1 // sign bit val s_ready :: s_neg_inputs :: s_busy :: s_neg_outputs :: s_done :: Nil = Enum(5) { UFix() }; val state = Reg(resetVal = s_ready); @@ -26,57 +28,16 @@ class rocketDivider(w: Int, earlyOut: Boolean = false) extends Component { val fn = io.req.bits.fn(io.req.bits.fn.width-2,0) val tc = (fn === DIV_D) || (fn === DIV_R); - switch (state) { - is (s_ready) { - when (io.req.valid) { - state := Mux(tc, s_neg_inputs, s_busy) - } - } - is (s_neg_inputs) { - state := Mux(io.req_kill, s_ready, s_busy) - } - is (s_busy) { - when (io.req_kill && Reg(io.req.ready)) { - state := s_ready - } - .elsewhen (count === UFix(w)) { - state := Mux(neg_quo || neg_rem, s_neg_outputs, s_done) - } - } - is (s_neg_outputs) { - state := s_done - } - is (s_done) { - when (io.resp_rdy) { - state := s_ready - } - } - } - - // state machine + val lhs_sign = tc && Mux(dw === DW_64, io.req.bits.in0(w0-1), io.req.bits.in0(w0/2-1)) + val lhs_hi = Mux(dw === DW_64, io.req.bits.in0(w0-1,w0/2), Fill(w0/2, lhs_sign)) + val lhs_in = Cat(lhs_sign, lhs_hi, io.req.bits.in0(w0/2-1,0)) - val lhs_sign = tc && Mux(dw === DW_64, io.req.bits.in0(w-1), io.req.bits.in0(w/2-1)) - val lhs_hi = Mux(dw === DW_64, io.req.bits.in0(w-1,w/2), Fill(w/2, lhs_sign)) - val lhs_in = Cat(lhs_hi, io.req.bits.in0(w/2-1,0)) - - val rhs_sign = tc && Mux(dw === DW_64, io.req.bits.in1(w-1), io.req.bits.in1(w/2-1)) - val rhs_hi = Mux(dw === DW_64, io.req.bits.in1(w-1,w/2), Fill(w/2, rhs_sign)) - val rhs_in = Cat(rhs_hi, io.req.bits.in1(w/2-1,0)) + val rhs_sign = tc && Mux(dw === DW_64, io.req.bits.in1(w0-1), io.req.bits.in1(w0/2-1)) + val rhs_hi = Mux(dw === DW_64, io.req.bits.in1(w0-1,w0/2), Fill(w0/2, rhs_sign)) + val rhs_in = Cat(rhs_sign, rhs_hi, io.req.bits.in1(w0/2-1,0)) - when (io.req.fire()) { - count := UFix(0) - half := (dw === DW_32); - neg_quo := Bool(false); - neg_rem := Bool(false); - rem := (fn === DIV_R) || (fn === DIV_RU); - reg_tag := io.req_tag; - divby0 := Bool(true); - divisor := rhs_in - remainder := lhs_in - } when (state === s_neg_inputs) { - neg_rem := remainder(w-1) - neg_quo := (remainder(w-1) != divisor(w-1)) + state := s_busy when (remainder(w-1)) { remainder := Cat(remainder(2*w, w), -remainder(w-1,0)) } @@ -85,6 +46,7 @@ class rocketDivider(w: Int, earlyOut: Boolean = false) extends Component { } } when (state === s_neg_outputs) { + state := s_done when (neg_rem && neg_quo && !divby0) { remainder := Cat(-remainder(2*w, w+1), remainder(w), -remainder(w-1,0)) } @@ -96,6 +58,9 @@ class rocketDivider(w: Int, earlyOut: Boolean = false) extends Component { } } when (state === s_busy) { + when (count === UFix(w)) { + state := Mux(neg_quo || neg_rem, s_neg_outputs, s_done) + } count := count + UFix(1) val msb = subtractor(w) @@ -112,11 +77,26 @@ class rocketDivider(w: Int, earlyOut: Boolean = false) extends Component { remainder := remainder << shift count := shift } - } + } + when (state === s_done && io.resp_rdy || io.req_kill) { + state := s_ready + } + when (io.req.fire()) { + state := Mux(lhs_sign || rhs_sign, s_neg_inputs, s_busy) + count := UFix(0) + half := (dw === DW_32); + neg_quo := lhs_sign != rhs_sign + neg_rem := lhs_sign + rem := (fn === DIV_R) || (fn === DIV_RU); + reg_tag := io.req_tag; + divby0 := Bool(true); + divisor := rhs_in + remainder := lhs_in + } - val result = Mux(rem, remainder(2*w, w+1), remainder(w-1,0)) + val result = Mux(rem, remainder(w+w0, w+1), remainder(w0-1,0)) - io.resp_bits := Mux(half, Cat(Fill(w/2, result(w/2-1)), result(w/2-1,0)), result) + io.resp_bits := Mux(half, Cat(Fill(w0/2, result(w0/2-1)), result(w0/2-1,0)), result) io.resp_tag := reg_tag io.resp_val := state === s_done io.req.ready := state === s_ready diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index f052f6f1..24f70519 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -7,19 +7,13 @@ import Constants._ import Instructions._ import hwacha._ -class ioDpathImem extends Bundle() -{ - val req_addr = UFix(OUTPUT, VADDR_BITS+1); - val resp_data = Bits(INPUT, 32); -} - class ioDpathAll extends Bundle() { val host = new ioHTIF(); val ctrl = new ioCtrlDpath().flip val dmem = new ioHellaCache val dtlb = new ioDTLB_CPU_req_bundle().asOutput() - val imem = new ioDpathImem(); + val imem = new IOCPUFrontend val ptbr_wen = Bool(OUTPUT); val ptbr = UFix(OUTPUT, PADDR_BITS); val fpu = new ioDpathFPU(); @@ -32,27 +26,16 @@ class ioDpathAll extends Bundle() class rocketDpath extends Component { val io = new ioDpathAll(); - - val btb = new rocketDpathBTB(4); // # of entries in BTB - - val if_btb_target = btb.io.target; val pcr = new rocketDpathPCR(); val ex_pcr = pcr.io.r.data; - val alu = new rocketDpathALU(); + val alu = new ALU val ex_alu_out = alu.io.out; val ex_alu_adder_out = alu.io.adder_out; val rfile = new rocketDpathRegfile(); - // instruction fetch definitions - val if_reg_pc = Reg(resetVal = UFix(START_ADDR,VADDR_BITS+1)); - - // instruction decode definitions - val id_reg_inst = Reg(resetVal = NOP); - val id_reg_pc = Reg() { UFix(width = VADDR_BITS+1) }; - // execute definitions val ex_reg_pc = Reg() { UFix() }; val ex_reg_inst = Reg() { Bits() }; @@ -62,13 +45,8 @@ class rocketDpath extends Component val ex_reg_rs2 = Reg() { Bits() }; val ex_reg_rs1 = Reg() { Bits() }; val ex_reg_waddr = Reg() { UFix() }; - val ex_reg_ctrl_eret = Reg(resetVal = Bool(false)); val ex_reg_ctrl_fn_dw = Reg() { UFix() }; val ex_reg_ctrl_fn_alu = Reg() { UFix() }; - val ex_reg_ctrl_mul_val = Reg(resetVal = Bool(false)); - val ex_reg_ctrl_mul_fn = Reg() { UFix() }; - val ex_reg_ctrl_div_val = Reg(resetVal = Bool(false)); - val ex_reg_ctrl_div_fn = Reg() { UFix() }; val ex_reg_ctrl_sel_wb = Reg() { UFix() }; val ex_wdata = Bits(); @@ -99,9 +77,6 @@ class rocketDpath extends Component val r_dmem_resp_replay = Reg(resetVal = Bool(false)); val r_dmem_fp_replay = Reg(resetVal = Bool(false)); val r_dmem_resp_waddr = Reg() { UFix() }; - - // instruction fetch stage - val if_pc_plus4 = if_reg_pc + UFix(4); val ex_pc_plus4 = ex_reg_pc + UFix(4); val ex_branch_target = ex_reg_pc + Cat(ex_reg_op2(VADDR_BITS-1,0), Bits(0,1)).toUFix @@ -109,41 +84,24 @@ class rocketDpath extends Component val ex_ea_sign = Mux(ex_alu_adder_out(VADDR_BITS-1), ~ex_alu_adder_out(63,VADDR_BITS) === UFix(0), ex_alu_adder_out(63,VADDR_BITS) != UFix(0)) val ex_effective_address = Cat(ex_ea_sign, ex_alu_adder_out(VADDR_BITS-1,0)).toUFix - val if_next_pc = - Mux(io.ctrl.sel_pc === PC_BTB, Cat(if_btb_target(VADDR_BITS-1), if_btb_target), - Mux(io.ctrl.sel_pc === PC_EX4, ex_pc_plus4, - Mux(io.ctrl.sel_pc === PC_BR, ex_branch_target, - Mux(io.ctrl.sel_pc === PC_JR, ex_effective_address, - Mux(io.ctrl.sel_pc === PC_PCR, Cat(pcr.io.evec(VADDR_BITS-1), pcr.io.evec), - Mux(io.ctrl.sel_pc === PC_WB, wb_reg_pc, - if_pc_plus4)))))) // PC_4 - - when (!io.ctrl.stallf) { - if_reg_pc := if_next_pc.toUFix; - } - - io.ctrl.xcpt_ma_inst := if_next_pc(1,0) != Bits(0) - - io.imem.req_addr := - Mux(io.ctrl.stallf, if_reg_pc, - if_next_pc.toUFix); - - btb.io.current_pc := if_reg_pc; - btb.io.hit <> io.ctrl.btb_hit; - btb.io.wen <> io.ctrl.wen_btb; - btb.io.clr <> io.ctrl.clr_btb; - btb.io.correct_pc := ex_reg_pc; - btb.io.correct_target := ex_branch_target - btb.io.invalidate := io.ctrl.flush_inst + // hook up I$ + io.imem.req.bits.invalidateTLB := pcr.io.ptbr_wen + io.imem.req.bits.currentpc := ex_reg_pc + io.imem.req.bits.status := pcr.io.status + io.imem.req.bits.pc := + Mux(io.ctrl.sel_pc === PC_EX4, ex_pc_plus4, + Mux(io.ctrl.sel_pc === PC_EX, Mux(io.ctrl.ex_jalr, ex_effective_address, ex_branch_target), + Mux(io.ctrl.sel_pc === PC_PCR, Cat(pcr.io.evec(VADDR_BITS-1), pcr.io.evec).toUFix, + wb_reg_pc))) // PC_WB // instruction decode stage - when (!io.ctrl.stalld) { - id_reg_pc := if_reg_pc; - id_reg_inst := Mux(io.ctrl.killf, NOP, io.imem.resp_data) - } + val id_inst = io.imem.resp.bits.data + val id_pc = io.imem.resp.bits.pc + debug(id_inst) + debug(id_pc) - val id_raddr1 = id_reg_inst(26,22).toUFix; - val id_raddr2 = id_reg_inst(21,17).toUFix; + val id_raddr1 = id_inst(26,22).toUFix; + val id_raddr2 = id_inst(21,17).toUFix; // regfile read rfile.io.r0.en <> io.ctrl.ren2; @@ -156,7 +114,7 @@ class rocketDpath extends Component // destination register selection val id_waddr = - Mux(io.ctrl.sel_wa === WA_RD, id_reg_inst(31,27).toUFix, + Mux(io.ctrl.sel_wa === WA_RD, id_inst(31,27).toUFix, RA); // WA_RA // bypass muxes @@ -185,26 +143,26 @@ class rocketDpath extends Component val id_imm_l = io.ctrl.sel_alu2 === A2_LTYPE val id_imm_zero = io.ctrl.sel_alu2 === A2_ZERO || io.ctrl.sel_alu2 === A2_RTYPE val id_imm_ibz = io.ctrl.sel_alu2 === A2_ITYPE || io.ctrl.sel_alu2 === A2_BTYPE || id_imm_zero - val id_imm_sign = Mux(id_imm_bj, id_reg_inst(31), - Mux(id_imm_l, id_reg_inst(26), + val id_imm_sign = Mux(id_imm_bj, id_inst(31), + Mux(id_imm_l, id_inst(26), Mux(id_imm_zero, Bits(0,1), - id_reg_inst(21)))) // IMM_ITYPE + id_inst(21)))) // IMM_ITYPE val id_imm_small = Mux(id_imm_zero, Bits(0,12), - Cat(Mux(id_imm_bj, id_reg_inst(31,27), id_reg_inst(21,17)), id_reg_inst(16,10))) + Cat(Mux(id_imm_bj, id_inst(31,27), id_inst(21,17)), id_inst(16,10))) val id_imm = Cat(Fill(32, id_imm_sign), - Mux(id_imm_l, Cat(id_reg_inst(26,7), Bits(0,12)), + Mux(id_imm_l, Cat(id_inst(26,7), Bits(0,12)), Mux(id_imm_ibz, Cat(Fill(20, id_imm_sign), id_imm_small), - Cat(Fill(7, id_imm_sign), id_reg_inst(31,7))))) // A2_JTYPE + Cat(Fill(7, id_imm_sign), id_inst(31,7))))) // A2_JTYPE val id_op2_dmem_bypass = id_rs2_dmem_bypass && io.ctrl.sel_alu2 === A2_RTYPE val id_op2 = Mux(io.ctrl.sel_alu2 === A2_RTYPE, id_rs2, id_imm) - io.ctrl.inst := id_reg_inst - io.fpu.inst := id_reg_inst + io.ctrl.inst := id_inst + io.fpu.inst := id_inst // execute stage - ex_reg_pc := id_reg_pc; - ex_reg_inst := id_reg_inst + ex_reg_pc := id_pc + ex_reg_inst := id_inst ex_reg_raddr1 := id_raddr1 ex_reg_raddr2 := id_raddr2; ex_reg_op2 := id_op2; @@ -213,21 +171,8 @@ class rocketDpath extends Component ex_reg_waddr := id_waddr; ex_reg_ctrl_fn_dw := io.ctrl.fn_dw.toUFix; ex_reg_ctrl_fn_alu := io.ctrl.fn_alu; - ex_reg_ctrl_mul_fn := io.ctrl.mul_fn; - ex_reg_ctrl_div_fn := io.ctrl.div_fn; ex_reg_ctrl_sel_wb := io.ctrl.sel_wb; - when(io.ctrl.killd) { - ex_reg_ctrl_div_val := Bool(false); - ex_reg_ctrl_mul_val := Bool(false); - ex_reg_ctrl_eret := Bool(false); - } - .otherwise { - ex_reg_ctrl_div_val := io.ctrl.div_val; - ex_reg_ctrl_mul_val := io.ctrl.mul_val; - ex_reg_ctrl_eret := io.ctrl.id_eret; - } - val ex_rs1 = Mux(Reg(id_rs1_dmem_bypass), wb_reg_dmem_wdata, ex_reg_rs1) val ex_rs2 = Mux(Reg(id_rs2_dmem_bypass), wb_reg_dmem_wdata, ex_reg_rs2) val ex_op2 = Mux(Reg(id_op2_dmem_bypass), wb_reg_dmem_wdata, ex_reg_op2) @@ -240,19 +185,19 @@ class rocketDpath extends Component io.fpu.fromint_data := ex_rs1 // divider - val div = new rocketDivider(64) - div.io.req.valid := ex_reg_ctrl_div_val - div.io.req.bits.fn := Cat(ex_reg_ctrl_fn_dw, ex_reg_ctrl_div_fn) + val div = new rocketDivider(earlyOut = true) + div.io.req.valid := io.ctrl.div_val + div.io.req.bits.fn := Cat(ex_reg_ctrl_fn_dw, io.ctrl.div_fn) div.io.req.bits.in0 := ex_rs1 div.io.req.bits.in1 := ex_rs2 div.io.req_tag := ex_reg_waddr - div.io.req_kill := io.ctrl.killm + div.io.req_kill := io.ctrl.div_kill div.io.resp_rdy := !dmem_resp_replay io.ctrl.div_rdy := div.io.req.ready io.ctrl.div_result_val := div.io.resp_val // multiplier - var mul_io = new rocketMultiplier(unroll = 6).io + var mul_io = new rocketMultiplier(unroll = 4, earlyOut = true).io if (HAVE_VEC) { val vu_mul = new rocketVUMultiplier(nwbq = 1) @@ -260,12 +205,12 @@ class rocketDpath extends Component vu_mul.io.vu.resp <> io.vec_imul_resp mul_io = vu_mul.io.cpu } - mul_io.req.valid := ex_reg_ctrl_mul_val; - mul_io.req.bits.fn := Cat(ex_reg_ctrl_fn_dw, ex_reg_ctrl_mul_fn) + mul_io.req.valid := io.ctrl.mul_val + mul_io.req.bits.fn := Cat(ex_reg_ctrl_fn_dw, io.ctrl.mul_fn) mul_io.req.bits.in0 := ex_rs1 mul_io.req.bits.in1 := ex_rs2 mul_io.req_tag := ex_reg_waddr - mul_io.req_kill := io.ctrl.killm + mul_io.req_kill := io.ctrl.mul_kill mul_io.resp_rdy := !dmem_resp_replay && !div.io.resp_val io.ctrl.mul_rdy := mul_io.req.ready io.ctrl.mul_result_val := mul_io.resp_val diff --git a/rocket/src/main/scala/dpath_alu.scala b/rocket/src/main/scala/dpath_alu.scala index 25fc2956..fc4b7a03 100644 --- a/rocket/src/main/scala/dpath_alu.scala +++ b/rocket/src/main/scala/dpath_alu.scala @@ -15,27 +15,45 @@ class ioALU extends Bundle(){ val adder_out = UFix(OUTPUT, 64); } -class rocketDpathALU extends Component +object ALU { + val FN_X = Bits("b????") + val FN_ADD = UFix(0) + val FN_SL = UFix(1) + val FN_XOR = UFix(4) + val FN_OR = UFix(6) + val FN_AND = UFix(7) + val FN_SR = UFix(5) + val FN_SUB = UFix(8) + val FN_SLT = UFix(10) + val FN_SLTU = UFix(11) + val FN_SRA = UFix(13) + val FN_OP2 = UFix(15) + + def isSub(cmd: Bits) = cmd(3) + def isSLTU(cmd: Bits) = cmd(0) +} + +class ALU extends Component +{ + import ALU._ val io = new ioALU(); // ADD, SUB - val sub = (io.fn === FN_SUB) || (io.fn === FN_SLT) || (io.fn === FN_SLTU) + val sub = isSub(io.fn) val adder_rhs = Mux(sub, ~io.in2, io.in2) val sum = (io.in1 + adder_rhs + sub.toUFix)(63,0) // SLT, SLTU val less = Mux(io.in1(63) === io.in2(63), sum(63), - Mux(io.fn === FN_SLT, io.in1(63), io.in2(63))) + Mux(isSLTU(io.fn), io.in2(63), io.in1(63))) // SLL, SRL, SRA - val sra = (io.fn === FN_SRA) val shamt = Cat(io.in2(5) & (io.dw === DW_64), io.in2(4,0)).toUFix - val shright = sra || (io.fn === FN_SR) - val shin_hi_32 = Mux(sra, Fill(32, io.in1(31)), UFix(0,32)) + val shin_hi_32 = Mux(isSub(io.fn), Fill(32, io.in1(31)), UFix(0,32)) val shin_hi = Mux(io.dw === DW_64, io.in1(63,32), shin_hi_32) val shin = Cat(shin_hi, io.in1(31,0)) - val shout_r = (Cat(sra & shin(63), shin).toFix >> shamt)(63,0) + val shout_r = (Cat(isSub(io.fn) & shin(63), shin).toFix >> shamt)(63,0) val shout_l = (shin << shamt)(63,0) val bitwise_logic = diff --git a/rocket/src/main/scala/dpath_util.scala b/rocket/src/main/scala/dpath_util.scala index f216e52a..61cec1fc 100644 --- a/rocket/src/main/scala/dpath_util.scala +++ b/rocket/src/main/scala/dpath_util.scala @@ -36,13 +36,14 @@ class rocketDpathBTB(entries: Int) extends Component val valid = Reg(resetVal = Bool(false)) val my_hit = valid && tag === io.current_pc val my_update = valid && tag === io.correct_pc - val my_clr = io.clr && my_update || io.invalidate - val my_wen = io.wen && (my_update || !update && UFix(i) === repl_way) - valid := !my_clr && (valid || my_wen) - when (my_wen) { - tag := io.correct_pc - target := io.correct_target + when (io.wen && (my_update || !update && UFix(i) === repl_way)) { + valid := Bool(false) + when (!io.clr) { + valid := Bool(true) + tag := io.correct_pc + target := io.correct_target + } } hit_reduction = hit_reduction || my_hit diff --git a/rocket/src/main/scala/dtlb.scala b/rocket/src/main/scala/dtlb.scala deleted file mode 100644 index 6ce054dc..00000000 --- a/rocket/src/main/scala/dtlb.scala +++ /dev/null @@ -1,183 +0,0 @@ -package rocket - -import Chisel._; -import Node._; -import Constants._; -import scala.math._; -import hwacha._ - -// ioDTLB_CPU also located in hwacha/src/vuVXU-Interface.scala -// should keep them in sync - -class ioDTLB_CPU_req_bundle extends Bundle -{ - // lookup requests - val kill = Bool() - val cmd = Bits(width=4) // load/store/amo - val asid = Bits(width=ASID_BITS) - val vpn = Bits(width=VPN_BITS+1) -} -class ioDTLB_CPU_req extends FIFOIO()( { new ioDTLB_CPU_req_bundle() } ) - -class ioDTLB_CPU_resp extends Bundle -{ - // lookup responses - val miss = Bool(OUTPUT) - val ppn = Bits(OUTPUT, PPN_BITS) - val xcpt_ld = Bool(OUTPUT) - val xcpt_st = Bool(OUTPUT) - val xcpt_pf = Bool(OUTPUT) -} - -class ioDTLB extends Bundle -{ - // status bits (from PCR), to check current permission and whether VM is enabled - val status = Bits(INPUT, 32) - // invalidate all TLB entries - val invalidate = Bool(INPUT) - val cpu_req = new ioDTLB_CPU_req().flip - val cpu_resp = new ioDTLB_CPU_resp() - val ptw = new ioTLB_PTW() -} - -class rocketDTLB(entries: Int) extends Component -{ - val io = new ioDTLB(); - - val addr_bits = ceil(log10(entries)/log10(2)).toInt; - - val s_ready :: s_request :: s_wait :: Nil = Enum(3) { UFix() }; - val state = Reg(resetVal = s_ready); - - val r_cpu_req_val = Reg(resetVal = Bool(false)); - val r_cpu_req_vpn = Reg() { Bits() } - val r_cpu_req_cmd = Reg() { Bits() } - val r_cpu_req_asid = Reg() { Bits() } - val r_refill_tag = Reg() { Bits() } - val r_refill_waddr = Reg() { UFix() } - - when (io.cpu_req.valid && io.cpu_req.ready) { - r_cpu_req_vpn := io.cpu_req.bits.vpn; - r_cpu_req_cmd := io.cpu_req.bits.cmd; - r_cpu_req_asid := io.cpu_req.bits.asid; - r_cpu_req_val := Bool(true); - } - .otherwise { - r_cpu_req_val := Bool(false); - } - - val req_load = (r_cpu_req_cmd === M_XRD); - val req_store = (r_cpu_req_cmd === M_XWR); - val req_amo = r_cpu_req_cmd(3).toBool; - val req_pf = (r_cpu_req_cmd === M_PFR) || (r_cpu_req_cmd === M_PFW) - - val bad_va = r_cpu_req_vpn(VPN_BITS) != r_cpu_req_vpn(VPN_BITS-1); - - val tag_cam = new rocketCAM(entries, ASID_BITS+VPN_BITS); - val tag_ram = Mem(entries) { io.ptw.resp_ppn.clone } - when (io.ptw.resp_val) { tag_ram(r_refill_waddr) := io.ptw.resp_ppn } - - val lookup_tag = Cat(r_cpu_req_asid, r_cpu_req_vpn); - tag_cam.io.clear := io.invalidate; - tag_cam.io.clear_hit := io.cpu_resp.xcpt_ld || io.cpu_resp.xcpt_st || io.cpu_resp.xcpt_pf - tag_cam.io.tag := lookup_tag; - tag_cam.io.write := io.ptw.resp_val || io.ptw.resp_err; - tag_cam.io.write_tag := r_refill_tag; - tag_cam.io.write_addr := r_refill_waddr; - val tag_hit = tag_cam.io.hit || bad_va; - val tag_hit_addr = tag_cam.io.hit_addr; - - // extract fields from status register - val status_s = io.status(SR_S).toBool; // user/supervisor mode - val status_u = !status_s; - val status_vm = io.status(SR_VM).toBool // virtual memory enable - - // extract fields from PT permission bits - val ptw_perm_ur = io.ptw.resp_perm(2); - val ptw_perm_uw = io.ptw.resp_perm(1); - val ptw_perm_sr = io.ptw.resp_perm(5); - val ptw_perm_sw = io.ptw.resp_perm(4); - - // permission bit arrays - val ur_array = Reg(resetVal = Bits(0, entries)); // user read permission - val uw_array = Reg(resetVal = Bits(0, entries)); // user write permission - val sr_array = Reg(resetVal = Bits(0, entries)); // supervisor read permission - val sw_array = Reg(resetVal = Bits(0, entries)); // supervisor write permission - when (io.ptw.resp_val) { - ur_array := ur_array.bitSet(r_refill_waddr, ptw_perm_ur); - uw_array := uw_array.bitSet(r_refill_waddr, ptw_perm_uw); - sr_array := sr_array.bitSet(r_refill_waddr, ptw_perm_sr); - sw_array := sw_array.bitSet(r_refill_waddr, ptw_perm_sw); - } - - // when the page table lookup reports an error, set all permission - // bits to 0 so the next access will cause an exception - when (io.ptw.resp_err) { - ur_array := ur_array.bitSet(r_refill_waddr, Bool(false)); - uw_array := uw_array.bitSet(r_refill_waddr, Bool(false)); - sr_array := sr_array.bitSet(r_refill_waddr, Bool(false)); - sw_array := sw_array.bitSet(r_refill_waddr, Bool(false)); - } - - // high if there are any unused (invalid) entries in the TLB - val has_invalid_entry = !tag_cam.io.valid_bits.andR - val invalid_entry = PriorityEncoder(~tag_cam.io.valid_bits) - val plru = new PseudoLRU(entries) - val repl_waddr = Mux(has_invalid_entry, invalid_entry, plru.replace).toUFix; - - val lookup = (state === s_ready) && status_vm && r_cpu_req_val && (req_load || req_store || req_amo || req_pf); - val lookup_hit = lookup && tag_hit; - val lookup_miss = lookup && !tag_hit; - val tlb_hit = !io.cpu_req.bits.kill && lookup_hit; - val tlb_miss = !io.cpu_req.bits.kill && lookup_miss; - - // currently replace TLB entries in LIFO order - // TODO: implement LRU replacement policy - when (tlb_miss) { - r_refill_tag := lookup_tag; - r_refill_waddr := repl_waddr; - } - when (tlb_hit) { - plru.access(tag_hit_addr) - } - - val load_fault_common = tlb_hit && - ((status_s && !sr_array(tag_hit_addr)) || - (status_u && !ur_array(tag_hit_addr)) || - bad_va) - val store_fault_common = tlb_hit && - ((status_s && !sw_array(tag_hit_addr)) || - (status_u && !uw_array(tag_hit_addr)) || - bad_va) - - io.cpu_resp.xcpt_ld := load_fault_common && (req_load || req_amo) - io.cpu_resp.xcpt_st := store_fault_common && (req_store || req_amo) - io.cpu_resp.xcpt_pf := load_fault_common && req_pf - - io.cpu_req.ready := (state === s_ready) && !lookup_miss; - io.cpu_resp.miss := tlb_miss; - io.cpu_resp.ppn := - Mux(status_vm, tag_ram(tag_hit_addr), r_cpu_req_vpn(PPN_BITS-1,0)); - - io.ptw.req_val := (state === s_request); - io.ptw.req_vpn := r_refill_tag(VPN_BITS-1,0); - - // control state machine - switch (state) { - is (s_ready) { - when (tlb_miss) { - state := s_request; - } - } - is (s_request) { - when (io.ptw.req_rdy) { - state := s_wait; - } - } - is (s_wait) { - when (io.ptw.resp_val || io.ptw.resp_err) { - state := s_ready; - } - } - } -} diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index 9b79e2ce..3967b477 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -6,172 +6,249 @@ import Constants._; import scala.math._; import uncore._ -// interface between I$ and pipeline/ITLB (32 bits wide) -class ioImem extends Bundle +case class ICacheConfig(co: CoherencePolicyWithUncached, sets: Int, assoc: Int, parity: Boolean = false) { - val invalidate = Bool(INPUT); - val itlb_miss = Bool(INPUT); - val req_val = Bool(INPUT); - val req_idx = Bits(INPUT, PGIDX_BITS); - val req_ppn = Bits(INPUT, PPN_BITS); - val resp_data = Bits(OUTPUT, 32); - val resp_val = Bool(OUTPUT); + val w = 1 + val ibytes = INST_BITS/8 + + val dm = assoc == 1 + val lines = sets * assoc + val databits = MEM_DATA_BITS + val datawidth = databits + (if (parity) 1 else 0) + val idxbits = log2Up(sets) + val offbits = OFFSET_BITS + val untagbits = idxbits + offbits + val tagbits = PADDR_BITS - untagbits + val tagwidth = tagbits + (if (parity) 1 else 0) + + require(isPow2(sets) && isPow2(assoc)) + require(isPow2(w) && isPow2(ibytes)) + require(PGIDX_BITS >= untagbits) } -class ioRocketICache extends Bundle() -{ - val cpu = new ioImem(); - val mem = new ioUncachedRequestor +class FrontendReq extends Bundle { + val pc = UFix(width = VADDR_BITS+1) + val status = Bits(width = 32) + val invalidate = Bool() + val invalidateTLB = Bool() + val mispredict = Bool() + val taken = Bool() + val currentpc = UFix(width = VADDR_BITS+1) } -// basic direct mapped instruction cache -// 32 bit wide cpu port, 128 bit wide memory port, 64 byte cachelines -// parameters : -// lines = # cache lines -class rocketICache(sets: Int, assoc: Int, co: CoherencePolicyWithUncached) extends Component +class FrontendResp extends Bundle { + val pc = UFix(width = VADDR_BITS+1) // ID stage PC + val data = Bits(width = INST_BITS) + val taken = Bool() + val xcpt_ma = Bool() + val xcpt_if = Bool() +} + +class IOCPUFrontend extends Bundle { + val req = new PipeIO()(new FrontendReq) + val resp = new FIFOIO()(new FrontendResp).flip + val ptw = new IOTLBPTW().flip +} + +class Frontend(c: ICacheConfig) extends Component { - val io = new ioRocketICache(); - - val lines = sets * assoc; - val addrbits = PADDR_BITS; - val indexbits = log2Up(sets); - val offsetbits = OFFSET_BITS; - val tagmsb = addrbits - 1; - val taglsb = indexbits+offsetbits; - val tagbits = addrbits-taglsb; - val indexmsb = taglsb-1; - val indexlsb = offsetbits; - val offsetmsb = indexlsb-1; - val databits = 32; - val offsetlsb = log2Up(databits/8); - val rf_cnt_bits = log2Up(REFILL_CYCLES); - - require(PGIDX_BITS >= taglsb); // virtually-indexed, physically-tagged constraint - require(isPow2(sets) && isPow2(assoc)); + val io = new Bundle { + val cpu = new IOCPUFrontend().flip + val mem = new ioUncachedRequestor + } - val s_reset :: s_ready :: s_request :: s_refill_wait :: s_refill :: Nil = Enum(5) { UFix() }; - val state = Reg(resetVal = s_reset); + val btb = new rocketDpathBTB(BTB_ENTRIES) + val icache = new ICache(c) + val tlb = new TLB(ITLB_ENTRIES) + + val s1_pc = Reg() { UFix() } + val s2_valid = Reg(resetVal = Bool(true)) + val s2_pc = Reg(resetVal = UFix(START_ADDR)) + val s2_btb_hit = Reg(resetVal = Bool(false)) + val s2_xcpt_if = Reg(resetVal = Bool(false)) + + val btbTarget = Cat(btb.io.target(VADDR_BITS-1), btb.io.target) + val pcp4_0 = s1_pc + UFix(c.ibytes) + val pcp4 = Cat(s1_pc(VADDR_BITS-1) & pcp4_0(VADDR_BITS-1), pcp4_0(VADDR_BITS-1,0)) + val icmiss = s2_valid && !icache.io.resp.valid + val npc = Mux(icmiss, s2_pc, Mux(btb.io.hit, btbTarget, pcp4)).toUFix + + val stall = !io.cpu.resp.ready + when (!stall) { + s1_pc := npc + s2_valid := !icmiss + s2_pc := s1_pc + s2_btb_hit := btb.io.hit + s2_xcpt_if := tlb.io.resp.xcpt_if + } + when (io.cpu.req.valid) { + s1_pc := io.cpu.req.bits.pc + s2_valid := Bool(false) + } + + btb.io.current_pc := s1_pc + btb.io.wen := io.cpu.req.bits.mispredict + btb.io.clr := !io.cpu.req.bits.taken + btb.io.correct_pc := io.cpu.req.bits.currentpc + btb.io.correct_target := io.cpu.req.bits.pc + btb.io.invalidate := io.cpu.req.bits.invalidate || io.cpu.req.bits.invalidateTLB + + tlb.io.ptw <> io.cpu.ptw + tlb.io.req.valid := !stall && !icmiss + tlb.io.req.bits.vpn := s1_pc >> UFix(PGIDX_BITS) + tlb.io.req.bits.status := io.cpu.req.bits.status + tlb.io.req.bits.asid := UFix(0) + tlb.io.req.bits.invalidate := io.cpu.req.bits.invalidateTLB + tlb.io.req.bits.instruction := Bool(true) + + icache.io.mem <> io.mem + icache.io.req.valid := !stall + icache.io.req.bits.idx := Mux(io.cpu.req.valid, io.cpu.req.bits.pc, npc) + icache.io.req.bits.invalidate := io.cpu.req.bits.invalidate + icache.io.req.bits.ppn := tlb.io.resp.ppn + icache.io.req.bits.kill := io.cpu.req.valid || tlb.io.resp.miss + icache.io.resp.ready := io.cpu.resp.ready + + io.cpu.resp.valid := s2_valid && (s2_xcpt_if || icache.io.resp.valid) + io.cpu.resp.bits.pc := s2_pc + io.cpu.resp.bits.data := icache.io.resp.bits.data + io.cpu.resp.bits.taken := s2_btb_hit + io.cpu.resp.bits.xcpt_ma := s2_pc(log2Up(c.ibytes)-1,0) != UFix(0) + io.cpu.resp.bits.xcpt_if := s2_xcpt_if +} + +class ICache(c: ICacheConfig) extends Component +{ + val io = new Bundle { + val req = new PipeIO()(new Bundle { + val idx = UFix(width = PGIDX_BITS) + val invalidate = Bool() + val ppn = UFix(width = PPN_BITS) // delayed one cycle + val kill = Bool() // delayed one cycle + }).flip + val resp = new FIFOIO()(new Bundle { + val data = Bits(width = INST_BITS) + val datablock = Bits(width = c.databits) + }) + val mem = new ioUncachedRequestor + } + + val s_ready :: s_request :: s_refill_wait :: s_refill :: Nil = Enum(4) { UFix() } + val state = Reg(resetVal = s_ready) val invalidated = Reg() { Bool() } - - val r_cpu_req_idx = Reg { Bits() } - val r_cpu_req_ppn = Reg { Bits() } - val r_cpu_req_val = Reg(resetVal = Bool(false)); - + val stall = !io.resp.ready val rdy = Bool() - val tag_hit = Bool() - - when (io.cpu.req_val && rdy) { - r_cpu_req_val := Bool(true) - r_cpu_req_idx := io.cpu.req_idx - } - .otherwise { - r_cpu_req_val := Bool(false) - } - when (state === s_ready && r_cpu_req_val && !io.cpu.itlb_miss) { - r_cpu_req_ppn := io.cpu.req_ppn + + val s2_valid = Reg(resetVal = Bool(false)) + val s2_addr = Reg { UFix(width = PADDR_BITS) } + + val s1_valid = Reg(resetVal = Bool(false)) + val s1_pgoff = Reg() { UFix(width = PGIDX_BITS) } + + val s0_valid = io.req.valid && rdy || s1_valid && stall && !io.req.bits.kill + val s0_pgoff = Mux(io.req.valid, io.req.bits.idx, s1_pgoff) + + s1_valid := s0_valid + when (io.req.valid && rdy) { + s1_pgoff := s0_pgoff } - val r_cpu_hit_addr = Cat(io.cpu.req_ppn, r_cpu_req_idx) - val r_cpu_hit_tag = r_cpu_hit_addr(tagmsb,taglsb) - val r_cpu_miss_addr = Cat(r_cpu_req_ppn, r_cpu_req_idx) - val r_cpu_miss_tag = r_cpu_miss_addr(tagmsb,taglsb) - - // refill counter - val refill_count = Reg(resetVal = UFix(0, rf_cnt_bits)); - when (io.mem.xact_rep.valid) { - refill_count := refill_count + UFix(1); + s2_valid := s1_valid && rdy && !io.req.bits.kill || stall + when (s1_valid && rdy && !stall) { + s2_addr := Cat(io.req.bits.ppn, s1_pgoff).toUFix } - val refill_done = io.mem.xact_rep.valid && refill_count.andR - val repl_way = if (assoc == 1) UFix(0) else LFSR16(state === s_ready && r_cpu_req_val && !io.cpu.itlb_miss && !tag_hit)(log2Up(assoc)-1,0) - val word_shift = Cat(r_cpu_req_idx(offsetmsb-rf_cnt_bits,offsetlsb), UFix(0, log2Up(databits))).toUFix - val tag_we = refill_done - val tag_addr = - Mux((state === s_refill), r_cpu_req_idx(indexmsb,indexlsb), - io.cpu.req_idx(indexmsb,indexlsb)).toUFix; - val data_addr = - Mux((state === s_refill_wait) || (state === s_refill), Cat(r_cpu_req_idx(indexmsb,offsetbits), refill_count), - io.cpu.req_idx(indexmsb, offsetbits-rf_cnt_bits)).toUFix; + val s2_tag = s2_addr(c.tagbits+c.untagbits-1,c.untagbits) + val s2_idx = s2_addr(c.untagbits-1,c.offbits) + val s2_offset = s2_addr(c.offbits-1,0) + val s2_any_tag_hit = Bool() + val s2_hit = s2_valid && s2_any_tag_hit + val s2_miss = s2_valid && !s2_any_tag_hit + rdy := state === s_ready && !s2_miss - val tag_array = Mem(sets, seqRead = true) { Bits(width = tagbits*assoc) } + val (rf_cnt, refill_done) = Counter(io.mem.xact_rep.valid, REFILL_CYCLES) + val repl_way = if (c.dm) UFix(0) else LFSR16(s2_miss)(log2Up(c.assoc)-1,0) + + val tag_array = Mem(c.sets, seqRead = true) { Bits(width = c.tagwidth*c.assoc) } val tag_rdata = Reg() { Bits() } - when (tag_we) { - tag_array.write(tag_addr, Fill(assoc, r_cpu_miss_tag), FillInterleaved(tagbits, if (assoc > 1) UFixToOH(repl_way) else Bits(1))) - }.otherwise { - tag_rdata := tag_array(tag_addr) + when (refill_done) { + val wmask = FillInterleaved(c.tagwidth, if (c.dm) Bits(1) else UFixToOH(repl_way)) + val tag = Cat(if (c.parity) s2_tag.xorR else null, s2_tag) + tag_array.write(s2_idx, Fill(c.assoc, tag), wmask) + }.elsewhen (s0_valid) { + tag_rdata := tag_array(s0_pgoff(c.untagbits-1,c.offbits)) } - val vb_array = Reg(resetVal = Bits(0, lines)) - when (io.cpu.invalidate) { + val vb_array = Reg(resetVal = Bits(0, c.lines)) + when (refill_done && !invalidated) { + vb_array := vb_array.bitSet(Cat(repl_way, s2_idx), Bool(true)) + } + when (io.req.bits.invalidate) { vb_array := Bits(0) - }.elsewhen (tag_we) { - vb_array := vb_array.bitSet(Cat(r_cpu_req_idx(indexmsb,indexlsb), if (assoc > 1) repl_way else null), !invalidated) + invalidated := Bool(true) } + val s2_disparity = Vec(c.assoc) { Bool() } + for (i <- 0 until c.assoc) + when (s2_valid && s2_disparity(i)) { vb_array := vb_array.bitSet(Cat(UFix(i), s2_idx), Bool(false)) } - val data_mux = (new Mux1H(assoc)){Bits(width = databits)} - var any_hit = Bool(false) - for (i <- 0 until assoc) - { - val valid = vb_array(Cat(r_cpu_req_idx(indexmsb,indexlsb), if (assoc > 1) UFix(i, log2Up(assoc)) else null)) - val hit = valid && tag_rdata(tagbits*(i+1)-1, tagbits*i) === r_cpu_hit_addr(tagmsb,taglsb) - - // data array - val data_array = Mem(sets*REFILL_CYCLES, seqRead = true){ io.mem.xact_rep.bits.data.clone } - val data_out = Reg(){ io.mem.xact_rep.bits.data.clone } - when (io.mem.xact_rep.valid && repl_way === UFix(i)) { data_array(data_addr) := io.mem.xact_rep.bits.data } - .otherwise { data_out := data_array(data_addr) } - - data_mux.io.sel(i) := hit - data_mux.io.in(i) := (data_out >> word_shift)(databits-1,0); - - any_hit = any_hit || hit + val s2_tag_hit = Vec(c.assoc) { Bool() } + val s2_data_disparity = Vec(c.assoc) { Bool() } + for (i <- 0 until c.assoc) { + val s1_vb = vb_array(Cat(UFix(i), s1_pgoff(c.untagbits-1,c.offbits))).toBool + val s2_vb = Reg() { Bool() } + val s2_tag_out = Reg() { Bits() } + when (s1_valid && rdy && !stall) { + s2_vb := s1_vb + s2_tag_out := tag_rdata(c.tagwidth*(i+1)-1, c.tagwidth*i) + } + s2_tag_hit(i) := s2_vb && s2_tag_out(c.tagbits-1,0) === s2_tag + s2_disparity(i) := Bool(c.parity) && s2_vb && (s2_tag_out.xorR || s2_data_disparity(i)) } - tag_hit := any_hit + s2_any_tag_hit := s2_tag_hit.reduceLeft(_||_) && !s2_disparity.reduceLeft(_||_) + + val s2_dout = Vec(c.assoc) { Reg() { Bits(width = c.databits) } } + for (i <- 0 until c.assoc) { + val data_array = Mem(c.sets*REFILL_CYCLES, seqRead = true){ Bits(width = c.datawidth) } + val s1_dout = Reg(){ Bits() } + when (io.mem.xact_rep.valid && repl_way === UFix(i)) { + val d = io.mem.xact_rep.bits.data + val wdata = if (c.parity) Cat(d.xorR, d) else d + data_array(Cat(s2_idx,rf_cnt)) := wdata + }.elsewhen (s0_valid) { + s1_dout := data_array(s0_pgoff(c.untagbits-1,c.offbits-rf_cnt.getWidth)) + } + when (s1_valid && rdy && !stall) { s2_dout(i) := s1_dout } + s2_data_disparity(i) := s2_dout(i).xorR + } + val s2_dout_word = s2_dout.map(x => (x >> Cat(s2_offset(log2Up(c.databits/8)-1,log2Up(c.ibytes)), Bits(0,log2Up(c.ibytes*8))))(c.ibytes*8-1,0)) + io.resp.bits.data := Mux1H(s2_tag_hit, s2_dout_word) + io.resp.bits.datablock := Mux1H(s2_tag_hit, s2_dout) val finish_q = (new Queue(1)) { new TransactionFinish } finish_q.io.enq.valid := refill_done && io.mem.xact_rep.bits.require_ack finish_q.io.enq.bits.global_xact_id := io.mem.xact_rep.bits.global_xact_id // output signals - io.cpu.resp_val := !io.cpu.itlb_miss && (state === s_ready) && r_cpu_req_val && tag_hit; - rdy := !io.cpu.itlb_miss && (state === s_ready) && (!r_cpu_req_val || tag_hit); - io.cpu.resp_data := data_mux.io.out + io.resp.valid := s2_hit io.mem.xact_init.valid := (state === s_request) && finish_q.io.enq.ready - io.mem.xact_init.bits := co.getUncachedReadTransactionInit(r_cpu_miss_addr(tagmsb,indexlsb).toUFix, UFix(0)) + io.mem.xact_init.bits := c.co.getUncachedReadTransactionInit(s2_addr >> UFix(c.offbits), UFix(0)) io.mem.xact_finish <> finish_q.io.deq // control state machine - when (io.cpu.invalidate) { - invalidated := Bool(true) - } switch (state) { - is (s_reset) { - state := s_ready; - } is (s_ready) { - when (r_cpu_req_val && !tag_hit && !io.cpu.itlb_miss) { - state := s_request; - } + when (s2_miss) { state := s_request } invalidated := Bool(false) } - is (s_request) - { - when (io.mem.xact_init.ready && finish_q.io.enq.ready) { - state := s_refill_wait; - } + is (s_request) { + when (io.mem.xact_init.ready && finish_q.io.enq.ready) { state := s_refill_wait } } is (s_refill_wait) { - when (io.mem.xact_abort.valid) { - state := s_request - } - when (io.mem.xact_rep.valid) { - state := s_refill; - } + when (io.mem.xact_abort.valid) { state := s_request } + when (io.mem.xact_rep.valid) { state := s_refill } } is (s_refill) { - when (refill_done) { - state := s_ready; - } + when (refill_done) { state := s_ready } } - } + } } diff --git a/rocket/src/main/scala/itlb.scala b/rocket/src/main/scala/itlb.scala deleted file mode 100644 index dac04fa2..00000000 --- a/rocket/src/main/scala/itlb.scala +++ /dev/null @@ -1,229 +0,0 @@ -package rocket - -import Chisel._; -import Node._; -import Constants._; -import scala.math._; - -class ioCAM(entries: Int, addr_bits: Int, tag_bits: Int) extends Bundle { - val clear = Bool(INPUT); - val clear_hit = Bool(INPUT) - val tag = Bits(INPUT, tag_bits); - val hit = Bool(OUTPUT); - val hit_addr = UFix(OUTPUT, addr_bits); - val valid_bits = Bits(OUTPUT, entries); - - val write = Bool(INPUT); - val write_tag = Bits(INPUT, tag_bits); - val write_addr = UFix(INPUT, addr_bits); -} - -class rocketCAM(entries: Int, tag_bits: Int) extends Component { - val addr_bits = ceil(log(entries)/log(2)).toInt; - val io = new ioCAM(entries, addr_bits, tag_bits); - val cam_tags = Vec(entries) { Reg() { Bits(width = tag_bits) } } - val mux = (new Mux1H(entries)) { Bits(width = addr_bits) } - - val vb_array = Reg(resetVal = Bits(0, entries)); - when (io.write) { - vb_array := vb_array.bitSet(io.write_addr, Bool(true)); - cam_tags(io.write_addr) := io.write_tag - } - when (io.clear) { - vb_array := Bits(0, entries); - } - .elsewhen (io.clear_hit) { - vb_array := vb_array & ~mux.io.sel.toBits - } - - var l_hit = Bool(false) - for (i <- 0 to entries-1) { - val my_hit = vb_array(UFix(i)).toBool && (cam_tags(i) === io.tag) - l_hit = l_hit || my_hit - mux.io.in(i) := Bits(i) - mux.io.sel(i) := my_hit - } - - io.valid_bits := vb_array; - io.hit := l_hit; - io.hit_addr := mux.io.out.toUFix; -} - -class PseudoLRU(n: Int) -{ - val state = Reg() { Bits(width = n) } - def access(way: UFix) = { - var next_state = state - var idx = UFix(1,1) - for (i <- log2Up(n)-1 to 0 by -1) { - val bit = way(i) - val mask = (UFix(1,n) << idx)(n-1,0) - next_state = next_state & ~mask | Mux(bit, UFix(0), mask) - //next_state.bitSet(idx, !bit) - idx = Cat(idx, bit) - } - state := next_state - } - def replace = { - var idx = UFix(1,1) - for (i <- 0 until log2Up(n)) - idx = Cat(idx, state(idx)) - idx(log2Up(n)-1,0) - } -} - -// interface between TLB and PTW -class ioTLB_PTW extends Bundle -{ - // requests - val req_val = Bool(OUTPUT); - val req_rdy = Bool(INPUT); - val req_vpn = Bits(OUTPUT, VPN_BITS); - // responses - val resp_val = Bool(INPUT); - val resp_err = Bool(INPUT); - val resp_ppn = Bits(INPUT, PPN_BITS); - val resp_perm = Bits(INPUT, PERM_BITS); -} - -// interface between ITLB and fetch stage of pipeline -class ioITLB_CPU extends Bundle -{ - // status bits (from PCR), to check current permission and whether VM is enabled - val status = Bits(INPUT, 32); - // invalidate all TLB entries - val invalidate = Bool(INPUT); - // lookup requests - val req_val = Bool(INPUT); - val req_rdy = Bool(OUTPUT); - val req_asid = Bits(INPUT, ASID_BITS); - val req_vpn = UFix(INPUT, VPN_BITS+1); - // lookup responses - val resp_miss = Bool(OUTPUT); -// val resp_val = Bool(OUTPUT); - val resp_ppn = UFix(OUTPUT, PPN_BITS); - val exception = Bool(OUTPUT); -} - -class ioITLB extends Bundle -{ - val cpu = new ioITLB_CPU(); - val ptw = new ioTLB_PTW(); -} - -class rocketITLB(entries: Int) extends Component -{ - val addr_bits = ceil(log10(entries)/log10(2)).toInt; - val io = new ioITLB(); - - val s_ready :: s_request :: s_wait :: Nil = Enum(3) { UFix() }; - val state = Reg(resetVal = s_ready); - - val r_cpu_req_val = Reg(resetVal = Bool(false)); - val r_cpu_req_vpn = Reg() { Bits() }; - val r_cpu_req_asid = Reg() { Bits() }; - val r_refill_tag = Reg() { Bits() }; - val r_refill_waddr = Reg() { UFix() }; - - when (io.cpu.req_val && io.cpu.req_rdy) { - r_cpu_req_vpn := io.cpu.req_vpn; - r_cpu_req_asid := io.cpu.req_asid; - r_cpu_req_val := Bool(true); - } - .otherwise { - r_cpu_req_val := Bool(false); - } - - val bad_va = r_cpu_req_vpn(VPN_BITS) != r_cpu_req_vpn(VPN_BITS-1); - - val tag_cam = new rocketCAM(entries, ASID_BITS+VPN_BITS); - val tag_ram = Mem(entries) { io.ptw.resp_ppn.clone } - when (io.ptw.resp_val) { tag_ram(r_refill_waddr) := io.ptw.resp_ppn } - - val lookup_tag = Cat(r_cpu_req_asid, r_cpu_req_vpn); - tag_cam.io.clear := io.cpu.invalidate; - tag_cam.io.clear_hit := io.cpu.exception - tag_cam.io.tag := lookup_tag; - tag_cam.io.write := io.ptw.resp_val || io.ptw.resp_err; - tag_cam.io.write_tag := r_refill_tag; - tag_cam.io.write_addr := r_refill_waddr; - val tag_hit = tag_cam.io.hit || bad_va; - val tag_hit_addr = tag_cam.io.hit_addr; - - // extract fields from status register - val status_s = io.cpu.status(SR_S).toBool; // user/supervisor mode - val status_u = !status_s; - val status_vm = io.cpu.status(SR_VM).toBool // virtual memory enable - - // extract fields from PT permission bits - val ptw_perm_ux = io.ptw.resp_perm(0); - val ptw_perm_sx = io.ptw.resp_perm(3); - - // permission bit arrays - val ux_array = Reg(resetVal = Bits(0, entries)); // user execute permission - val sx_array = Reg(resetVal = Bits(0, entries)); // supervisor execute permission - when (io.ptw.resp_val) { - ux_array := ux_array.bitSet(r_refill_waddr, ptw_perm_ux); - sx_array := sx_array.bitSet(r_refill_waddr, ptw_perm_sx); - } - - // when the page table lookup reports an error, set both execute permission - // bits to 0 so the next access will cause an exceptions - when (io.ptw.resp_err) { - ux_array := ux_array.bitSet(r_refill_waddr, Bool(false)); - sx_array := sx_array.bitSet(r_refill_waddr, Bool(false)); - } - - // high if there are any unused entries in the ITLB - val has_invalid_entry = !tag_cam.io.valid_bits.andR - val invalid_entry = PriorityEncoder(~tag_cam.io.valid_bits) - val plru = new PseudoLRU(entries) - val repl_waddr = Mux(has_invalid_entry, invalid_entry, plru.replace).toUFix; - - val lookup = (state === s_ready) && r_cpu_req_val; - val lookup_hit = lookup && tag_hit; - val lookup_miss = lookup && !tag_hit; - val tlb_hit = status_vm && lookup_hit; - val tlb_miss = status_vm && lookup_miss; - - when (tlb_miss) { - r_refill_tag := lookup_tag; - r_refill_waddr := repl_waddr; - } - when (tlb_hit) { - plru.access(tag_hit_addr) - } - - val access_fault = - tlb_hit && - ((status_s && !sx_array(tag_hit_addr).toBool) || - (status_u && !ux_array(tag_hit_addr).toBool) || - bad_va); - - io.cpu.exception := access_fault; - io.cpu.req_rdy := Mux(status_vm, (state === s_ready) && (!r_cpu_req_val || tag_hit), Bool(true)); - io.cpu.resp_miss := tlb_miss || (state != s_ready); - io.cpu.resp_ppn := Mux(status_vm, tag_ram(tag_hit_addr), r_cpu_req_vpn(PPN_BITS-1,0)).toUFix; - - io.ptw.req_val := (state === s_request); - io.ptw.req_vpn := r_refill_tag(VPN_BITS-1,0); - - // control state machine - switch (state) { - is (s_ready) { - when (tlb_miss) { - state := s_request; - } - } - is (s_request) { - when (io.ptw.req_rdy) { - state := s_wait; - } - } - is (s_wait) { - when (io.ptw.resp_val || io.ptw.resp_err) { - state := s_ready; - } - } - } -} diff --git a/rocket/src/main/scala/multiplier.scala b/rocket/src/main/scala/multiplier.scala index c52ef782..989db4cb 100644 --- a/rocket/src/main/scala/multiplier.scala +++ b/rocket/src/main/scala/multiplier.scala @@ -100,7 +100,7 @@ class rocketMultiplier(unroll: Int = 1, earlyOut: Boolean = false) extends Compo r_prod:= rhs_in r_lsb := Bool(false) } - .elsewhen (io.resp_val && io.resp_rdy || io.req_kill && r_cnt === UFix(0)) { // can only kill on first cycle + .elsewhen (io.resp_val && io.resp_rdy || io.req_kill) { r_val := Bool(false) } diff --git a/rocket/src/main/scala/ptw.scala b/rocket/src/main/scala/ptw.scala index d0c540c5..3e2eaf63 100644 --- a/rocket/src/main/scala/ptw.scala +++ b/rocket/src/main/scala/ptw.scala @@ -73,7 +73,7 @@ class rocketHellaCacheArbiter(n: Int) extends Component class ioPTW(n: Int) extends Bundle { - val requestor = Vec(n) { new ioTLB_PTW }.flip + val requestor = Vec(n) { new IOTLBPTW }.flip val mem = new ioHellaCache val ptbr = UFix(INPUT, PADDR_BITS) } @@ -99,20 +99,15 @@ class rocketPTW(n: Int) extends Component val vpn_idxs = (1 until levels).map(i => r_req_vpn((levels-i)*bitsPerLevel-1, (levels-i-1)*bitsPerLevel)) val vpn_idx = (2 until levels).foldRight(vpn_idxs(0))((i,j) => Mux(count === UFix(i-1), vpn_idxs(i-1), j)) - - val req_rdy = state === s_ready - var req_val = Bool(false) - for (r <- io.requestor) { - r.req_rdy := req_rdy && !req_val - req_val = req_val || r.req_val - } - val req_dest = PriorityEncoder(io.requestor.map(_.req_val)) - val req_vpn = io.requestor.slice(0, n-1).foldRight(io.requestor(n-1).req_vpn)((r, v) => Mux(r.req_val, r.req_vpn, v)) - when (state === s_ready && req_val) { - r_req_vpn := req_vpn - r_req_dest := req_dest - req_addr := Cat(io.ptbr(PADDR_BITS-1,PGIDX_BITS), req_vpn(VPN_BITS-1,VPN_BITS-bitsPerLevel), Bits(0,3)) + val arb = new Arbiter(n)(UFix(width = VPN_BITS)) + arb.io.in <> io.requestor.map(_.req) + arb.io.out.ready := state === s_ready + + when (arb.io.out.fire()) { + r_req_vpn := arb.io.out.bits + r_req_dest := arb.io.chosen + req_addr := Cat(io.ptbr(PADDR_BITS-1,PGIDX_BITS), arb.io.out.bits(VPN_BITS-1,VPN_BITS-bitsPerLevel), Bits(0,3)) } val dmem_resp_val = Reg(io.mem.resp.valid, resetVal = Bool(false)) @@ -129,8 +124,8 @@ class rocketPTW(n: Int) extends Component io.mem.req.bits.ppn := Reg(req_addr(PADDR_BITS-1,PGIDX_BITS)) io.mem.req.bits.kill := Bool(false) - val resp_val = state === s_done - val resp_err = state === s_error + val resp_val = state === s_done || state === s_error + val resp_err = state === s_error || state === s_wait val resp_ptd = io.mem.resp.bits.data_subword(1,0) === Bits(1) val resp_pte = io.mem.resp.bits.data_subword(1,0) === Bits(2) @@ -140,16 +135,16 @@ class rocketPTW(n: Int) extends Component for (i <- 0 until io.requestor.size) { val me = r_req_dest === UFix(i) - io.requestor(i).resp_val := resp_val && me - io.requestor(i).resp_err := resp_err && me - io.requestor(i).resp_perm := r_resp_perm - io.requestor(i).resp_ppn := resp_ppn + io.requestor(i).resp.valid := resp_val && me + io.requestor(i).resp.bits.error := resp_err + io.requestor(i).resp.bits.perm := r_resp_perm + io.requestor(i).resp.bits.ppn := resp_ppn.toUFix } // control state machine switch (state) { is (s_ready) { - when (req_val) { + when (arb.io.out.valid) { state := s_req; } count := UFix(0) diff --git a/rocket/src/main/scala/queues.scala b/rocket/src/main/scala/queues.scala index f414ff6e..56505d7b 100644 --- a/rocket/src/main/scala/queues.scala +++ b/rocket/src/main/scala/queues.scala @@ -3,25 +3,27 @@ package rocket import Chisel._ import Node._; -class SkidBuffer[T <: Data](resetSignal: Bool = null)(data: => T) extends Component(resetSignal) +class SkidBuffer[T <: Data](entries: Int, lateEnq: Boolean = false)(data: => T) extends Component { val io = new Bundle { val enq = new FIFOIO()(data).flip val deq = new FIFOIO()(data) } + require(entries >= 2) val fq = new Queue(1, flow = true)(data) - val pq = new Queue(1, pipe = true)(data) + val pq = new Queue(entries-1, pipe = true)(data) + val (iq, oq) = if (lateEnq) (pq, fq) else (fq, pq) - fq.io.enq <> io.enq - pq.io.enq <> fq.io.deq - io.deq <> pq.io.deq + iq.io.enq <> io.enq + oq.io.enq <> iq.io.deq + io.deq <> oq.io.deq } object SkidBuffer { - def apply[T <: Data](enq: FIFOIO[T]): FIFOIO[T] = { - val s = new SkidBuffer()(enq.bits.clone) + def apply[T <: Data](enq: FIFOIO[T], entries: Int = 2): FIFOIO[T] = { + val s = new SkidBuffer(entries)(enq.bits.clone) s.io.enq <> enq s.io.deq } diff --git a/rocket/src/main/scala/tile.scala b/rocket/src/main/scala/tile.scala index 5e621808..c6a84583 100644 --- a/rocket/src/main/scala/tile.scala +++ b/rocket/src/main/scala/tile.scala @@ -13,7 +13,7 @@ class Tile(co: CoherencePolicyWithUncached, resetSignal: Bool = null) extends Co } val cpu = new rocketProc - val icache = new rocketICache(128, 4, co) // 128 sets x 4 ways (32KB) + val icache = new Frontend(ICacheConfig(co, 128, 4)) // 128 sets x 4 ways (32KB) val dcache = new HellaCache(co) val arbiter = new rocketMemArbiter(2 + (if (HAVE_VEC) 1 else 0)) @@ -31,7 +31,7 @@ class Tile(co: CoherencePolicyWithUncached, resetSignal: Bool = null) extends Co if (HAVE_VEC) { - val vicache = new rocketICache(128, 1, co) // 128 sets x 1 ways (8KB) + val vicache = new Frontend(ICacheConfig(co, 128, 1)) // 128 sets x 1 ways (8KB) arbiter.io.requestor(2) <> vicache.io.mem cpu.io.vimem <> vicache.io.cpu } diff --git a/rocket/src/main/scala/tlb.scala b/rocket/src/main/scala/tlb.scala new file mode 100644 index 00000000..6ce671a4 --- /dev/null +++ b/rocket/src/main/scala/tlb.scala @@ -0,0 +1,255 @@ +package rocket + +import Chisel._; +import Node._; +import Constants._; +import scala.math._; +import hwacha._ + +class ioCAM(entries: Int, addr_bits: Int, tag_bits: Int) extends Bundle { + val clear = Bool(INPUT); + val clear_hit = Bool(INPUT) + val tag = Bits(INPUT, tag_bits); + val hit = Bool(OUTPUT); + val hits = UFix(OUTPUT, entries); + val valid_bits = Bits(OUTPUT, entries); + + val write = Bool(INPUT); + val write_tag = Bits(INPUT, tag_bits); + val write_addr = UFix(INPUT, addr_bits); +} + +class rocketCAM(entries: Int, tag_bits: Int) extends Component { + val addr_bits = ceil(log(entries)/log(2)).toInt; + val io = new ioCAM(entries, addr_bits, tag_bits); + val cam_tags = Vec(entries) { Reg() { Bits(width = tag_bits) } } + + val vb_array = Reg(resetVal = Bits(0, entries)); + when (io.write) { + vb_array := vb_array.bitSet(io.write_addr, Bool(true)); + cam_tags(io.write_addr) := io.write_tag + } + when (io.clear) { + vb_array := Bits(0, entries); + } + .elsewhen (io.clear_hit) { + vb_array := vb_array & ~io.hits + } + + val hits = (0 until entries).map(i => vb_array(i) && cam_tags(i) === io.tag) + + io.valid_bits := vb_array; + io.hits := Vec(hits){Bool()}.toBits.toUFix + io.hit := io.hits.orR +} + +class PseudoLRU(n: Int) +{ + val state = Reg() { Bits(width = n) } + def access(way: UFix) = { + var next_state = state + var idx = UFix(1,1) + for (i <- log2Up(n)-1 to 0 by -1) { + val bit = way(i) + val mask = (UFix(1,n) << idx)(n-1,0) + next_state = next_state & ~mask | Mux(bit, UFix(0), mask) + //next_state.bitSet(idx, !bit) + idx = Cat(idx, bit) + } + state := next_state + } + def replace = { + var idx = UFix(1,1) + for (i <- 0 until log2Up(n)) + idx = Cat(idx, state(idx)) + idx(log2Up(n)-1,0) + } +} + +class IOTLBPTW extends Bundle { + val req = new FIFOIO()(UFix(width = VPN_BITS)) + val resp = new PipeIO()(new Bundle { + val error = Bool() + val ppn = UFix(width = PPN_BITS) + val perm = Bits(width = PERM_BITS) + }).flip +} + +class TLBReq extends Bundle +{ + val asid = UFix(width = ASID_BITS) + val vpn = UFix(width = VPN_BITS+1) + val status = Bits(width = 32) + val invalidate = Bool() + val instruction = Bool() +} + +class TLBResp(entries: Int) extends Bundle +{ + // lookup responses + val miss = Bool(OUTPUT) + val hit_idx = UFix(OUTPUT, entries) + val ppn = UFix(OUTPUT, PPN_BITS) + val xcpt_ld = Bool(OUTPUT) + val xcpt_st = Bool(OUTPUT) + val xcpt_pf = Bool(OUTPUT) + val xcpt_if = Bool(OUTPUT) + + override def clone = new TLBResp(entries).asInstanceOf[this.type] +} + +class TLB(entries: Int) extends Component +{ + val io = new Bundle { + val req = new FIFOIO()(new TLBReq).flip + val resp = new TLBResp(entries) + val ptw = new IOTLBPTW + } + + val s_ready :: s_request :: s_wait :: s_wait_invalidate :: Nil = Enum(4) { UFix() } + val state = Reg(resetVal = s_ready) + val r_refill_tag = Reg() { UFix() } + val r_refill_waddr = Reg() { UFix() } + + val tag_cam = new rocketCAM(entries, ASID_BITS+VPN_BITS); + val tag_ram = Vec(entries) { Reg() { io.ptw.resp.bits.ppn.clone } } + when (io.ptw.resp.valid) { tag_ram(r_refill_waddr) := io.ptw.resp.bits.ppn } + + val lookup_tag = Cat(io.req.bits.asid, io.req.bits.vpn).toUFix + tag_cam.io.clear := io.req.bits.invalidate + tag_cam.io.clear_hit := io.req.fire() && Mux(io.req.bits.instruction, io.resp.xcpt_if, io.resp.xcpt_ld && io.resp.xcpt_st) + tag_cam.io.tag := lookup_tag + tag_cam.io.write := state === s_wait && io.ptw.resp.valid + tag_cam.io.write_tag := r_refill_tag + tag_cam.io.write_addr := r_refill_waddr + val tag_hit = tag_cam.io.hit + val tag_hit_addr = OHToUFix(tag_cam.io.hits) + + // permission bit arrays + val ur_array = Reg(resetVal = Bits(0, entries)) // user read permission + val uw_array = Reg(resetVal = Bits(0, entries)) // user write permission + val ux_array = Reg(resetVal = Bits(0, entries)) // user execute permission + val sr_array = Reg(resetVal = Bits(0, entries)) // supervisor read permission + val sw_array = Reg(resetVal = Bits(0, entries)) // supervisor write permission + val sx_array = Reg(resetVal = Bits(0, entries)) // supervisor execute permission + when (tag_cam.io.write) { + val perm = (!io.ptw.resp.bits.error).toFix & io.ptw.resp.bits.perm(5,0) + ur_array := ur_array.bitSet(r_refill_waddr, perm(2)) + uw_array := uw_array.bitSet(r_refill_waddr, perm(1)) + ux_array := ux_array.bitSet(r_refill_waddr, perm(0)) + sr_array := sr_array.bitSet(r_refill_waddr, perm(5)) + sw_array := sw_array.bitSet(r_refill_waddr, perm(4)) + sx_array := sx_array.bitSet(r_refill_waddr, perm(3)) + } + + // high if there are any unused (invalid) entries in the TLB + val has_invalid_entry = !tag_cam.io.valid_bits.andR + val invalid_entry = PriorityEncoder(~tag_cam.io.valid_bits) + val plru = new PseudoLRU(entries) + val repl_waddr = Mux(has_invalid_entry, invalid_entry, plru.replace) + + val status_s = io.req.bits.status(SR_S) // user/supervisor mode + val status_vm = io.req.bits.status(SR_VM) // virtual memory enable + val bad_va = io.req.bits.vpn(VPN_BITS) != io.req.bits.vpn(VPN_BITS-1) + val tlb_hit = status_vm && tag_hit + val tlb_miss = status_vm && !tag_hit && !bad_va + + when (io.req.valid && tlb_hit) { + plru.access(tag_hit_addr) + } + + io.req.ready := state === s_ready + io.resp.xcpt_ld := bad_va || tlb_hit && !Mux(status_s, sr_array(tag_hit_addr), ur_array(tag_hit_addr)) + io.resp.xcpt_st := bad_va || tlb_hit && !Mux(status_s, sw_array(tag_hit_addr), uw_array(tag_hit_addr)) + io.resp.xcpt_if := bad_va || tlb_hit && !Mux(status_s, sx_array(tag_hit_addr), ux_array(tag_hit_addr)) + io.resp.miss := tlb_miss + io.resp.ppn := Mux(status_vm, Mux1H(tag_cam.io.hits, tag_ram), io.req.bits.vpn(PPN_BITS-1,0)) + io.resp.hit_idx := tag_cam.io.hits + + io.ptw.req.valid := state === s_request + io.ptw.req.bits := r_refill_tag + + when (io.req.fire() && tlb_miss) { + state := s_request + r_refill_tag := lookup_tag + r_refill_waddr := repl_waddr + } + when (state === s_request) { + when (io.req.bits.invalidate) { + state := s_ready + } + when (io.ptw.req.ready) { + state := s_wait + when (io.req.bits.invalidate) { state := s_wait_invalidate } + } + } + when (state === s_wait && io.req.bits.invalidate) { + state := s_wait_invalidate + } + when ((state === s_wait || state === s_wait_invalidate) && io.ptw.resp.valid) { + state := s_ready + } +} + +// ioDTLB_CPU also located in hwacha/src/vuVXU-Interface.scala +// should keep them in sync + +class ioDTLB_CPU_req_bundle extends TLBReq +{ + val kill = Bool() + val cmd = Bits(width=4) // load/store/amo +} +class ioDTLB_CPU_req extends FIFOIO()( { new ioDTLB_CPU_req_bundle() } ) +class ioDTLB_CPU_resp extends TLBResp(1) + +class ioDTLB extends Bundle +{ + // status bits (from PCR), to check current permission and whether VM is enabled + val status = Bits(INPUT, 32) + // invalidate all TLB entries + val invalidate = Bool(INPUT) + val cpu_req = new ioDTLB_CPU_req().flip + val cpu_resp = new ioDTLB_CPU_resp() + val ptw = new IOTLBPTW +} + +class rocketTLB(entries: Int) extends Component +{ + val io = new ioDTLB(); + + val r_cpu_req_val = Reg(resetVal = Bool(false)); + val r_cpu_req_vpn = Reg() { UFix() } + val r_cpu_req_cmd = Reg() { Bits() } + val r_cpu_req_asid = Reg() { UFix() } + + val tlb = new TLB(entries) + tlb.io.req.valid := r_cpu_req_val && !io.cpu_req.bits.kill + tlb.io.req.bits.instruction := Bool(false) + tlb.io.req.bits.invalidate := io.invalidate + tlb.io.req.bits.status := io.status + tlb.io.req.bits.vpn := r_cpu_req_vpn + tlb.io.req.bits.asid := r_cpu_req_asid + + def cmdIsRead(cmd: Bits) = cmd === M_XRD || cmd(3) + def cmdIsWrite(cmd: Bits) = cmd === M_XWR || cmd(3) + def cmdIsPrefetch(cmd: Bits) = cmd === M_PFR || cmd === M_PFW + def cmdNeedsTLB(cmd: Bits) = cmdIsRead(cmd) || cmdIsWrite(cmd) || cmdIsPrefetch(cmd) + + when (io.cpu_req.fire() && cmdNeedsTLB(io.cpu_req.bits.cmd)) { + r_cpu_req_vpn := io.cpu_req.bits.vpn; + r_cpu_req_cmd := io.cpu_req.bits.cmd; + r_cpu_req_asid := io.cpu_req.bits.asid; + r_cpu_req_val := Bool(true); + } + .otherwise { + r_cpu_req_val := Bool(false); + } + + io.cpu_req.ready := tlb.io.req.ready && !io.cpu_resp.miss + io.cpu_resp.ppn := tlb.io.resp.ppn + io.cpu_resp.miss := r_cpu_req_val && tlb.io.resp.miss + io.cpu_resp.xcpt_ld := r_cpu_req_val && tlb.io.resp.xcpt_ld && cmdIsRead(r_cpu_req_cmd) + io.cpu_resp.xcpt_st := r_cpu_req_val && tlb.io.resp.xcpt_st && cmdIsWrite(r_cpu_req_cmd) + io.cpu_resp.xcpt_pf := r_cpu_req_val && tlb.io.resp.xcpt_ld && cmdIsPrefetch(r_cpu_req_cmd) + io.ptw <> tlb.io.ptw +}