From 8dce89703a520bf0b95da9d1f638ab0da48f588c Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Fri, 16 Nov 2012 02:39:33 -0800 Subject: [PATCH] new D$ with better QoR and AMO pipelining Vector unit is disabled because nack handling needs to be fixed. --- rocket/src/main/scala/arbiter.scala | 11 +- rocket/src/main/scala/consts.scala | 34 -- rocket/src/main/scala/core.scala | 6 +- rocket/src/main/scala/ctrl.scala | 613 ++++++++++---------- rocket/src/main/scala/ctrl_util.scala | 32 - rocket/src/main/scala/dpath.scala | 158 +++-- rocket/src/main/scala/fpu.scala | 15 +- rocket/src/main/scala/nbdcache.scala | 805 +++++++++++--------------- rocket/src/main/scala/package.scala | 2 +- rocket/src/main/scala/ptw.scala | 15 +- rocket/src/main/scala/tile.scala | 3 +- 11 files changed, 738 insertions(+), 956 deletions(-) delete mode 100644 rocket/src/main/scala/ctrl_util.scala diff --git a/rocket/src/main/scala/arbiter.scala b/rocket/src/main/scala/arbiter.scala index 8fb5d3f7..119362ee 100644 --- a/rocket/src/main/scala/arbiter.scala +++ b/rocket/src/main/scala/arbiter.scala @@ -43,8 +43,7 @@ class HellaCacheArbiter(n: Int)(implicit conf: RocketConfiguration) extends Comp io.requestor(i).xcpt := io.mem.xcpt resp.bits := io.mem.resp.bits resp.bits.tag := io.mem.resp.bits.tag >> UFix(log2Up(n)) - resp.bits.miss := io.mem.resp.bits.miss && tag_hit - resp.bits.nack := io.mem.resp.bits.nack && r_valid(i) + resp.bits.nack := io.mem.resp.bits.nack && tag_hit resp.bits.replay := io.mem.resp.bits.replay && tag_hit } } @@ -90,10 +89,15 @@ class MemArbiter(n: Int) extends Component { for (i <- 1 until n) io.requestor(i).xact_finish.ready := io.requestor(i-1).xact_finish.ready && !io.requestor(i-1).xact_finish.valid + io.mem.xact_rep.ready := Bool(false) for (i <- 0 until n) { val tag = io.mem.xact_rep.bits.tile_xact_id - io.requestor(i).xact_rep.valid := io.mem.xact_rep.valid && tag(log2Up(n)-1,0) === UFix(i) + io.requestor(i).xact_rep.valid := Bool(false) + when (tag(log2Up(n)-1,0) === UFix(i)) { + io.requestor(i).xact_rep.valid := io.mem.xact_rep.valid + io.mem.xact_rep.ready := io.requestor(i).xact_rep.ready + } io.requestor(i).xact_rep.bits := io.mem.xact_rep.bits io.requestor(i).xact_rep.bits.tile_xact_id := tag >> UFix(log2Up(n)) } @@ -107,5 +111,4 @@ class MemArbiter(n: Int) extends Component { } io.mem.xact_abort.ready := Bool(true) - io.mem.xact_rep.ready := Bool(true) } diff --git a/rocket/src/main/scala/consts.scala b/rocket/src/main/scala/consts.scala index b482673d..3490ce4b 100644 --- a/rocket/src/main/scala/consts.scala +++ b/rocket/src/main/scala/consts.scala @@ -72,35 +72,6 @@ trait ScalarOpConstants { val RA = UFix(1, 5); } -trait MemoryOpConstants { - val MT_X = Bits("b???", 3); - val MT_B = Bits("b000", 3); - val MT_H = Bits("b001", 3); - val MT_W = Bits("b010", 3); - val MT_D = Bits("b011", 3); - val MT_BU = Bits("b100", 3); - val MT_HU = Bits("b101", 3); - val MT_WU = Bits("b110", 3); - - val M_X = Bits("b????", 4); - val M_XRD = Bits("b0000", 4); // int load - val M_XWR = Bits("b0001", 4); // int store - val M_PFR = Bits("b0010", 4); // prefetch with intent to read - val M_PFW = Bits("b0011", 4); // prefetch with intent to write - val M_FLA = Bits("b0100", 4); // write back and invlaidate all lines - val M_FENCE = Bits("b0101", 4); // memory fence - val M_INV = Bits("b0110", 4); // write back and invalidate line - val M_CLN = Bits("b0111", 4); // write back line - val M_XA_ADD = Bits("b1000", 4); - val M_XA_SWAP = Bits("b1001", 4); - val M_XA_AND = Bits("b1010", 4); - val M_XA_OR = Bits("b1011", 4); - val M_XA_MIN = Bits("b1100", 4); - val M_XA_MAX = Bits("b1101", 4); - val M_XA_MINU = Bits("b1110", 4); - val M_XA_MAXU = Bits("b1111", 4); -} - trait PCRConstants { val PCR_X = Bits("b???", 3) val PCR_N = Bits(0,3) @@ -109,11 +80,6 @@ trait PCRConstants { val PCR_C = Bits(6,3) // clearpcr val PCR_S = Bits(7,3) // setpcr - val SYNC_X = Bits("b??", 2) - val SYNC_N = Bits(0,2); - val SYNC_D = Bits(1,2); - val SYNC_I = Bits(2,2); - val PCR_STATUS = UFix( 0, 5); val PCR_EPC = UFix( 1, 5); val PCR_BADVADDR = UFix( 2, 5); diff --git a/rocket/src/main/scala/core.scala b/rocket/src/main/scala/core.scala index 819aa73b..60973cea 100644 --- a/rocket/src/main/scala/core.scala +++ b/rocket/src/main/scala/core.scala @@ -114,11 +114,11 @@ class Core(implicit conf: RocketConfiguration) extends Component dmem(2).req.bits.data := Reg(StoreGen(vu.io.dmem_req.bits.typ, Bits(0), vu.io.dmem_req.bits.data).data) vu.io.dmem_req.ready := dmem(2).req.ready - vu.io.dmem_resp.valid := Reg(dmem(2).resp.valid) + vu.io.dmem_resp.valid := dmem(2).resp.valid vu.io.dmem_resp.bits.nack := dmem(2).resp.bits.nack vu.io.dmem_resp.bits.data := dmem(2).resp.bits.data_subword - vu.io.dmem_resp.bits.tag := Reg(dmem(2).resp.bits.tag) - vu.io.dmem_resp.bits.typ := Reg(dmem(2).resp.bits.typ) + vu.io.dmem_resp.bits.tag := dmem(2).resp.bits.tag + vu.io.dmem_resp.bits.typ := dmem(2).resp.bits.typ // share vector integer multiplier with rocket dpath.io.vec_imul_req <> vu.io.cp_imul_req diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index ac609557..86835d4a 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -26,8 +26,9 @@ class ioCtrlDpath extends Bundle() val sel_wa = Bool(OUTPUT); val sel_wb = UFix(OUTPUT, 3); val pcr = UFix(OUTPUT, 3) - val wb_eret = Bool(OUTPUT); + val eret = Bool(OUTPUT); val mem_load = Bool(OUTPUT); + val wb_load = Bool(OUTPUT) val ex_fp_val= Bool(OUTPUT); val mem_fp_val= Bool(OUTPUT); val ex_wen = Bool(OUTPUT); @@ -44,23 +45,20 @@ class ioCtrlDpath extends Bundle() // inputs from datapath val inst = Bits(INPUT, 32); val jalr_eq = Bool(INPUT) - val br_eq = Bool(INPUT); - val br_lt = Bool(INPUT); - val br_ltu = Bool(INPUT); + val ex_br_type = Bits(OUTPUT, 3) + val ex_br_taken = Bool(INPUT) val div_rdy = Bool(INPUT); val div_result_val = Bool(INPUT); val mul_rdy = Bool(INPUT); val mul_result_val = Bool(INPUT); - val mem_wb = Bool(INPUT); + val mem_ll_wb = Bool(INPUT) + val mem_ll_waddr = UFix(INPUT, 5) val ex_waddr = UFix(INPUT, 5); // write addr from execute stage val mem_waddr = UFix(INPUT, 5); // write addr from memory stage val wb_waddr = UFix(INPUT, 5); // write addr from writeback stage val status = Bits(INPUT, 32); - val sboard_clr = Bool(INPUT); - val sboard_clra = UFix(INPUT, 5); val fp_sboard_clr = Bool(INPUT); val fp_sboard_clra = UFix(INPUT, 5); - val fp_sboard_wb_waddr = UFix(INPUT, 5); val irq_timer = Bool(INPUT); val irq_ipi = Bool(INPUT); val pcr_replay = Bool(INPUT) @@ -71,12 +69,13 @@ abstract trait DecodeConstants val xpr64 = Y; val decode_default = - // jalr eret - // fp_val | renx2 div_val | syscall - // | vec_val | | renx1 mem_val mul_val | wen pcr | | privileged - // val | | brtype | | | s_alu2 dw alu | mem_cmd mem_type| mul_fn | | s_wa s_wb | sync | | | replay_next - // | | | | | | | | | | | | | | | | | | | | | | | | | - List(N, X,X,BR_X, X,X,X,A2_X, DW_X, FN_X, N,M_X, MT_X, X,MUL_X, X,X,WA_X, WB_X, PCR_X,SYNC_X,X,X,X,X) + // fence.i + // jalr | eret + // fp_val | renx2 div_val | | syscall + // | vec_val | | renx1 mem_val mul_val | wen pcr | | | privileged + // val | | brtype | | | s_alu2 dw alu | mem_cmd mem_type| mul_fn | | s_wa s_wb | | | | | replay_next + // | | | | | | | | | | | | | | | | | | | | | | | | | + List(N, X,X,BR_X, X,X,X,A2_X, DW_X, FN_X, N,M_X, MT_X, X,MUL_X, X,X,WA_X, WB_X, PCR_X,N,X,X,X,X) val table: Array[(Bits, List[Bits])] } @@ -84,237 +83,239 @@ abstract trait DecodeConstants object XDecode extends DecodeConstants { val table = Array( - // jalr eret - // fp_val | renx2 div_val | syscall - // | vec_val | | renx1 mem_val mul_val | wen pcr | | privileged - // val | | brtype | | | s_alu2 dw alu | mem_cmd mem_type| mul_fn | | s_wa s_wb | sync | | | replay_next - // | | | | | | | | | | | | | | | | | | | | | | | | | - BNE-> List(Y, N,N,BR_NE, N,Y,Y,A2_BTYPE,DW_X, FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N), - BEQ-> List(Y, N,N,BR_EQ, N,Y,Y,A2_BTYPE,DW_X, FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N), - BLT-> List(Y, N,N,BR_LT, N,Y,Y,A2_BTYPE,DW_X, FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N), - BLTU-> List(Y, N,N,BR_LTU,N,Y,Y,A2_BTYPE,DW_X, FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N), - BGE-> List(Y, N,N,BR_GE, N,Y,Y,A2_BTYPE,DW_X, FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N), - BGEU-> List(Y, N,N,BR_GEU,N,Y,Y,A2_BTYPE,DW_X, FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N), + // fence.i + // jalr | eret + // fp_val | renx2 div_val | | syscall + // | vec_val | | renx1 mem_val mul_val | wen pcr | | | privileged + // val | | brtype | | | s_alu2 dw alu | mem_cmd mem_type| mul_fn | | s_wa s_wb | | | | | replay_next + // | | | | | | | | | | | | | | | | | | | | | | | | | + BNE-> List(Y, N,N,BR_NE, N,Y,Y,A2_BTYPE,DW_X, FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_X, WB_X, PCR_N,N,N,N,N,N), + BEQ-> List(Y, N,N,BR_EQ, N,Y,Y,A2_BTYPE,DW_X, FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_X, WB_X, PCR_N,N,N,N,N,N), + BLT-> List(Y, N,N,BR_LT, N,Y,Y,A2_BTYPE,DW_X, FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_X, WB_X, PCR_N,N,N,N,N,N), + BLTU-> List(Y, N,N,BR_LTU,N,Y,Y,A2_BTYPE,DW_X, FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_X, WB_X, PCR_N,N,N,N,N,N), + BGE-> List(Y, N,N,BR_GE, N,Y,Y,A2_BTYPE,DW_X, FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_X, WB_X, PCR_N,N,N,N,N,N), + BGEU-> List(Y, N,N,BR_GEU,N,Y,Y,A2_BTYPE,DW_X, FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_X, WB_X, PCR_N,N,N,N,N,N), - J-> List(Y, N,N,BR_J, N,N,N,A2_JTYPE,DW_X, FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N), - JAL-> List(Y, N,N,BR_J, N,N,N,A2_JTYPE,DW_X, FN_ADD, N,M_X, MT_X, N,MUL_X, N,Y,WA_RA,WB_PC, PCR_N,SYNC_N,N,N,N,N), - JALR_C-> List(Y, N,N,BR_N, Y,N,Y,A2_ITYPE,DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_PC, PCR_N,SYNC_N,N,N,N,N), - JALR_J-> List(Y, N,N,BR_N, Y,N,Y,A2_ITYPE,DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_PC, PCR_N,SYNC_N,N,N,N,N), - JALR_R-> List(Y, N,N,BR_N, Y,N,Y,A2_ITYPE,DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_PC, PCR_N,SYNC_N,N,N,N,N), - RDNPC-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_PC, PCR_N,SYNC_N,N,N,N,N), + J-> List(Y, N,N,BR_J, N,N,N,A2_JTYPE,DW_X, FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_X, WB_X, PCR_N,N,N,N,N,N), + JAL-> List(Y, N,N,BR_J, N,N,N,A2_JTYPE,DW_X, FN_ADD, N,M_X, MT_X, N,MUL_X, N,Y,WA_RA,WB_PC, PCR_N,N,N,N,N,N), + JALR_C-> List(Y, N,N,BR_N, Y,N,Y,A2_ITYPE,DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_PC, PCR_N,N,N,N,N,N), + JALR_J-> List(Y, N,N,BR_N, Y,N,Y,A2_ITYPE,DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_PC, PCR_N,N,N,N,N,N), + JALR_R-> List(Y, N,N,BR_N, Y,N,Y,A2_ITYPE,DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_PC, PCR_N,N,N,N,N,N), + RDNPC-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_PC, PCR_N,N,N,N,N,N), - LB-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_ADD, Y,M_XRD, MT_B, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - LH-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_ADD, Y,M_XRD, MT_H, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - LW-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_ADD, Y,M_XRD, MT_W, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - LD-> List(xpr64,N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_ADD, Y,M_XRD, MT_D, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - LBU-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_ADD, Y,M_XRD, MT_BU,N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - LHU-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_ADD, Y,M_XRD, MT_HU,N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - LWU-> List(xpr64,N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_ADD, Y,M_XRD, MT_WU,N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - SB-> List(Y, N,N,BR_N, N,Y,Y,A2_BTYPE,DW_XPR,FN_ADD, Y,M_XWR, MT_B, N,MUL_X, N,N,WA_X, WB_ALU,PCR_N,SYNC_N,N,N,N,N), - SH-> List(Y, N,N,BR_N, N,Y,Y,A2_BTYPE,DW_XPR,FN_ADD, Y,M_XWR, MT_H, N,MUL_X, N,N,WA_X, WB_ALU,PCR_N,SYNC_N,N,N,N,N), - SW-> List(Y, N,N,BR_N, N,Y,Y,A2_BTYPE,DW_XPR,FN_ADD, Y,M_XWR, MT_W, N,MUL_X, N,N,WA_X, WB_ALU,PCR_N,SYNC_N,N,N,N,N), - SD-> List(xpr64,N,N,BR_N, N,Y,Y,A2_BTYPE,DW_XPR,FN_ADD, Y,M_XWR, MT_D, N,MUL_X, N,N,WA_X, WB_ALU,PCR_N,SYNC_N,N,N,N,N), + LB-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_ADD, Y,M_XRD, MT_B, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + LH-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_ADD, Y,M_XRD, MT_H, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + LW-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_ADD, Y,M_XRD, MT_W, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + LD-> List(xpr64,N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_ADD, Y,M_XRD, MT_D, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + LBU-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_ADD, Y,M_XRD, MT_BU,N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + LHU-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_ADD, Y,M_XRD, MT_HU,N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + LWU-> List(xpr64,N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_ADD, Y,M_XRD, MT_WU,N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + SB-> List(Y, N,N,BR_N, N,Y,Y,A2_BTYPE,DW_XPR,FN_ADD, Y,M_XWR, MT_B, N,MUL_X, N,N,WA_X, WB_ALU,PCR_N,N,N,N,N,N), + SH-> List(Y, N,N,BR_N, N,Y,Y,A2_BTYPE,DW_XPR,FN_ADD, Y,M_XWR, MT_H, N,MUL_X, N,N,WA_X, WB_ALU,PCR_N,N,N,N,N,N), + SW-> List(Y, N,N,BR_N, N,Y,Y,A2_BTYPE,DW_XPR,FN_ADD, Y,M_XWR, MT_W, N,MUL_X, N,N,WA_X, WB_ALU,PCR_N,N,N,N,N,N), + SD-> List(xpr64,N,N,BR_N, N,Y,Y,A2_BTYPE,DW_XPR,FN_ADD, Y,M_XWR, MT_D, N,MUL_X, N,N,WA_X, WB_ALU,PCR_N,N,N,N,N,N), - AMOADD_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_ADD, MT_W, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - AMOSWAP_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_SWAP,MT_W, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - AMOAND_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_AND, MT_W, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - AMOOR_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_OR, MT_W, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - AMOMIN_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_MIN, MT_W, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - AMOMINU_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_MINU,MT_W, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - AMOMAX_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_MAX, MT_W, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - AMOMAXU_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_MAXU,MT_W, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - AMOADD_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_ADD, MT_D, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - AMOSWAP_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_SWAP,MT_D, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - AMOAND_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_AND, MT_D, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - AMOOR_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_OR, MT_D, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - AMOMIN_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_MIN, MT_D, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - AMOMINU_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_MINU,MT_D, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - AMOMAX_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_MAX, MT_D, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - AMOMAXU_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_MAXU,MT_D, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + AMOADD_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_ADD, MT_W, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + AMOSWAP_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_SWAP,MT_W, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + AMOAND_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_AND, MT_W, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + AMOOR_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_OR, MT_W, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + AMOMIN_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_MIN, MT_W, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + AMOMINU_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_MINU,MT_W, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + AMOMAX_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_MAX, MT_W, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + AMOMAXU_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_MAXU,MT_W, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + AMOADD_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_ADD, MT_D, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + AMOSWAP_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_SWAP,MT_D, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + AMOAND_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_AND, MT_D, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + AMOOR_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_OR, MT_D, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + AMOMIN_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_MIN, MT_D, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + AMOMINU_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_MINU,MT_D, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + AMOMAX_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_MAX, MT_D, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + AMOMAXU_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_MAXU,MT_D, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - LUI-> List(Y, N,N,BR_N, N,N,N,A2_LTYPE,DW_XPR,FN_OP2, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - ADDI-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - SLTI -> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_SLT, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - SLTIU-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_SLTU,N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - ANDI-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_AND, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - ORI-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_OR, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - XORI-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_XOR, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - SLLI-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_SL, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - SRLI-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_SR, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - SRAI-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_SRA, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - ADD-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - SUB-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_SUB, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - SLT-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_SLT, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - SLTU-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_SLTU,N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - riscvAND-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_AND, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - riscvOR-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_OR, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - riscvXOR-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_XOR, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - SLL-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_SL, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - SRL-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_SR, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - SRA-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_SRA, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + LUI-> List(Y, N,N,BR_N, N,N,N,A2_LTYPE,DW_XPR,FN_OP2, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + ADDI-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + SLTI -> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_SLT, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + SLTIU-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_SLTU,N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + ANDI-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_AND, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + ORI-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_OR, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + XORI-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_XOR, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + SLLI-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_SL, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + SRLI-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_SR, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + SRAI-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_SRA, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + ADD-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + SUB-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_SUB, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + SLT-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_SLT, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + SLTU-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_SLTU,N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + riscvAND-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_AND, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + riscvOR-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_OR, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + riscvXOR-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_XOR, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + SLL-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_SL, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + SRL-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_SR, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + SRA-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_SRA, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - ADDIW-> List(xpr64,N,N,BR_N, N,N,Y,A2_ITYPE,DW_32,FN_ADD, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - SLLIW-> List(xpr64,N,N,BR_N, N,N,Y,A2_ITYPE,DW_32,FN_SL, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - SRLIW-> List(xpr64,N,N,BR_N, N,N,Y,A2_ITYPE,DW_32,FN_SR, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - SRAIW-> List(xpr64,N,N,BR_N, N,N,Y,A2_ITYPE,DW_32,FN_SRA, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - ADDW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RTYPE,DW_32,FN_ADD, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - SUBW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RTYPE,DW_32,FN_SUB, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - SLLW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RTYPE,DW_32,FN_SL, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - SRLW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RTYPE,DW_32,FN_SR, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - SRAW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RTYPE,DW_32,FN_SRA, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + ADDIW-> List(xpr64,N,N,BR_N, N,N,Y,A2_ITYPE,DW_32,FN_ADD, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + SLLIW-> List(xpr64,N,N,BR_N, N,N,Y,A2_ITYPE,DW_32,FN_SL, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + SRLIW-> List(xpr64,N,N,BR_N, N,N,Y,A2_ITYPE,DW_32,FN_SR, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + SRAIW-> List(xpr64,N,N,BR_N, N,N,Y,A2_ITYPE,DW_32,FN_SRA, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + ADDW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RTYPE,DW_32,FN_ADD, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + SUBW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RTYPE,DW_32,FN_SUB, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + SLLW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RTYPE,DW_32,FN_SL, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + SRLW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RTYPE,DW_32,FN_SR, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + SRAW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RTYPE,DW_32,FN_SRA, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - MUL-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_X, N,M_X, MT_X, Y,MUL_LO, N,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - MULH-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_X, N,M_X, MT_X, Y,MUL_H, N,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - MULHU-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_X, N,M_X, MT_X, Y,MUL_HU, N,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - MULHSU-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_X, N,M_X, MT_X, Y,MUL_HSU,N,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - MULW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RTYPE,DW_32, FN_X, N,M_X, MT_X, Y,MUL_LO, N,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + MUL-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_X, N,M_X, MT_X, Y,MUL_LO, N,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), + MULH-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_X, N,M_X, MT_X, Y,MUL_H, N,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), + MULHU-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_X, N,M_X, MT_X, Y,MUL_HU, N,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), + MULHSU-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_X, N,M_X, MT_X, Y,MUL_HSU,N,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), + MULW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RTYPE,DW_32, FN_X, N,M_X, MT_X, Y,MUL_LO, N,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), - DIV-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_X, N,M_X, MT_X, N,DIV_D, Y,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - DIVU-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_X, N,M_X, MT_X, N,DIV_DU, Y,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - REM-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_X, N,M_X, MT_X, N,DIV_R, Y,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - REMU-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_X, N,M_X, MT_X, N,DIV_RU, Y,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - DIVW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RTYPE,DW_32, FN_X, N,M_X, MT_X, N,DIV_D, Y,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - DIVUW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RTYPE,DW_32, FN_X, N,M_X, MT_X, N,DIV_DU, Y,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - REMW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RTYPE,DW_32, FN_X, N,M_X, MT_X, N,DIV_R, Y,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - REMUW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RTYPE,DW_32, FN_X, N,M_X, MT_X, N,DIV_RU, Y,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + DIV-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_X, N,M_X, MT_X, N,DIV_D, Y,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), + DIVU-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_X, N,M_X, MT_X, N,DIV_DU, Y,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), + REM-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_X, N,M_X, MT_X, N,DIV_R, Y,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), + REMU-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_X, N,M_X, MT_X, N,DIV_RU, Y,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), + DIVW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RTYPE,DW_32, FN_X, N,M_X, MT_X, N,DIV_D, Y,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), + DIVUW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RTYPE,DW_32, FN_X, N,M_X, MT_X, N,DIV_DU, Y,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), + REMW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RTYPE,DW_32, FN_X, N,M_X, MT_X, N,DIV_R, Y,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), + REMUW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RTYPE,DW_32, FN_X, N,M_X, MT_X, N,DIV_RU, Y,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), - SYSCALL-> List(Y, N,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_X, WB_X, PCR_N,SYNC_N,N,Y,N,N), - SETPCR-> List(Y, N,N,BR_N, N,N,N,A2_ITYPE,DW_XPR,FN_OP2, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_S,SYNC_N,N,N,Y,Y), - CLEARPCR-> List(Y, N,N,BR_N, N,N,N,A2_ITYPE,DW_XPR,FN_OP2, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_C,SYNC_N,N,N,Y,Y), - ERET-> List(Y, N,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_X, WB_X, PCR_N,SYNC_N,Y,N,Y,N), - FENCE-> List(Y, N,N,BR_N, N,N,N,A2_X, DW_X, FN_X, Y,M_FENCE, MT_X, N,MUL_X, N,N,WA_X, WB_X, PCR_N,SYNC_D,N,N,N,N), - FENCE_I-> List(Y, N,N,BR_N, N,N,N,A2_X, DW_X, FN_X, Y,M_FLA, MT_X, N,MUL_X, N,N,WA_X, WB_X, PCR_N,SYNC_I,N,N,N,Y), - CFLUSH-> List(Y, N,N,BR_N, N,N,N,A2_X, DW_X, FN_X, Y,M_FLA, MT_X, N,MUL_X, N,N,WA_X, WB_X, PCR_N,SYNC_N,N,N,Y,Y), - MFPCR-> List(Y, N,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_X, PCR_F,SYNC_N,N,N,Y,Y), - MTPCR-> List(Y, N,N,BR_N, N,Y,N,A2_RTYPE,DW_XPR,FN_OP2, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_T,SYNC_N,N,N,Y,Y), - RDTIME-> List(Y, N,N,BR_N, N,N,N,A2_X, DW_XPR,FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_TSC,PCR_N,SYNC_N,N,N,N,N), - RDCYCLE-> List(Y, N,N,BR_N, N,N,N,A2_X, DW_XPR,FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_TSC,PCR_N,SYNC_N,N,N,N,N), - RDINSTRET-> List(Y, N,N,BR_N, N,N,N,A2_X, DW_XPR,FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_IRT,PCR_N,SYNC_N,N,N,N,N)) + SYSCALL-> List(Y, N,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_X, WB_X, PCR_N,N,N,Y,N,N), + SETPCR-> List(Y, N,N,BR_N, N,N,N,A2_ITYPE,DW_XPR,FN_OP2, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_S,N,N,N,Y,Y), + CLEARPCR-> List(Y, N,N,BR_N, N,N,N,A2_ITYPE,DW_XPR,FN_OP2, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_C,N,N,N,Y,Y), + ERET-> List(Y, N,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_X, WB_X, PCR_N,N,Y,N,Y,N), + FENCE-> List(Y, N,N,BR_N, N,N,N,A2_X, DW_X, FN_X, Y,M_FENCE, MT_X, N,MUL_X, N,N,WA_X, WB_X, PCR_N,N,N,N,N,N), + FENCE_I-> List(Y, N,N,BR_N, N,N,N,A2_X, DW_X, FN_X, Y,M_FENCE, MT_X, N,MUL_X, N,N,WA_X, WB_X, PCR_N,Y,N,N,N,Y), + MFPCR-> List(Y, N,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_X, PCR_F,N,N,N,Y,Y), + MTPCR-> List(Y, N,N,BR_N, N,Y,N,A2_RTYPE,DW_XPR,FN_OP2, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_T,N,N,N,Y,Y), + RDTIME-> List(Y, N,N,BR_N, N,N,N,A2_X, DW_XPR,FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_TSC,PCR_N,N,N,N,N,N), + RDCYCLE-> List(Y, N,N,BR_N, N,N,N,A2_X, DW_XPR,FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_TSC,PCR_N,N,N,N,N,N), + RDINSTRET-> List(Y, N,N,BR_N, N,N,N,A2_X, DW_XPR,FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_IRT,PCR_N,N,N,N,N,N)) } object FDecode extends DecodeConstants { val table = Array( - // jalr eret - // fp_val | renx2 div_val | syscall - // | vec_val | | renx1 mem_val mul_val | wen pcr | | privileged - // val | | brtype | | | s_alu2 dw alu | mem_cmd mem_type| mul_fn | | s_wa s_wb | sync | | | replay_next - // | | | | | | | | | | | | | | | | | | | | | | | | | - FCVT_S_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FCVT_D_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FSGNJ_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FSGNJ_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FSGNJX_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FSGNJX_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FSGNJN_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FSGNJN_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FMIN_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FMIN_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FMAX_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FMAX_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FADD_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FADD_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FSUB_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FSUB_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FMUL_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FMUL_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FMADD_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FMADD_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FMSUB_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FMSUB_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FNMADD_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FNMADD_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FNMSUB_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FNMSUB_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - MFTX_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - MFTX_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FCVT_W_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FCVT_W_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FCVT_WU_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FCVT_WU_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FCVT_L_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FCVT_L_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FCVT_LU_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FCVT_LU_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FEQ_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FEQ_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FLT_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FLT_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FLE_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FLE_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - MXTF_S-> List(FPU_Y,Y,N,BR_N, N,N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - MXTF_D-> List(FPU_Y,Y,N,BR_N, N,N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FCVT_S_W-> List(FPU_Y,Y,N,BR_N, N,N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FCVT_D_W-> List(FPU_Y,Y,N,BR_N, N,N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FCVT_S_WU-> List(FPU_Y,Y,N,BR_N, N,N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FCVT_D_WU-> List(FPU_Y,Y,N,BR_N, N,N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FCVT_S_L-> List(FPU_Y,Y,N,BR_N, N,N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FCVT_D_L-> List(FPU_Y,Y,N,BR_N, N,N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FCVT_S_LU-> List(FPU_Y,Y,N,BR_N, N,N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FCVT_D_LU-> List(FPU_Y,Y,N,BR_N, N,N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - MFFSR-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - MTFSR-> List(FPU_Y,Y,N,BR_N, N,N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FLW-> List(FPU_Y,Y,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_ADD, Y,M_XRD, MT_W, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - FLD-> List(FPU_Y,Y,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_ADD, Y,M_XRD, MT_D, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - FSW-> List(FPU_Y,Y,N,BR_N, N,N,Y,A2_BTYPE,DW_XPR,FN_ADD, Y,M_XWR, MT_W, N,MUL_X, N,N,WA_X, WB_ALU,PCR_N,SYNC_N,N,N,N,N), - FSD-> List(FPU_Y,Y,N,BR_N, N,N,Y,A2_BTYPE,DW_XPR,FN_ADD, Y,M_XWR, MT_D, N,MUL_X, N,N,WA_X, WB_ALU,PCR_N,SYNC_N,N,N,N,N)) + // fence.i + // jalr | eret + // fp_val | renx2 div_val | | syscall + // | vec_val | | renx1 mem_val mul_val | wen pcr | | | privileged + // val | | brtype | | | s_alu2 dw alu | mem_cmd mem_type| mul_fn | | s_wa s_wb | | | | | replay_next + // | | | | | | | | | | | | | | | | | | | | | | | | | + FCVT_S_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FCVT_D_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FSGNJ_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FSGNJ_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FSGNJX_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FSGNJX_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FSGNJN_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FSGNJN_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FMIN_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FMIN_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FMAX_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FMAX_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FADD_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FADD_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FSUB_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FSUB_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FMUL_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FMUL_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FMADD_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FMADD_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FMSUB_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FMSUB_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FNMADD_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FNMADD_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FNMSUB_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FNMSUB_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), + MFTX_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), + MFTX_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FCVT_W_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FCVT_W_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FCVT_WU_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FCVT_WU_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FCVT_L_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FCVT_L_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FCVT_LU_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FCVT_LU_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FEQ_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FEQ_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FLT_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FLT_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FLE_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FLE_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), + MXTF_S-> List(FPU_Y,Y,N,BR_N, N,N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), + MXTF_D-> List(FPU_Y,Y,N,BR_N, N,N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FCVT_S_W-> List(FPU_Y,Y,N,BR_N, N,N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FCVT_D_W-> List(FPU_Y,Y,N,BR_N, N,N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FCVT_S_WU-> List(FPU_Y,Y,N,BR_N, N,N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FCVT_D_WU-> List(FPU_Y,Y,N,BR_N, N,N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FCVT_S_L-> List(FPU_Y,Y,N,BR_N, N,N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FCVT_D_L-> List(FPU_Y,Y,N,BR_N, N,N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FCVT_S_LU-> List(FPU_Y,Y,N,BR_N, N,N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FCVT_D_LU-> List(FPU_Y,Y,N,BR_N, N,N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), + MFFSR-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), + MTFSR-> List(FPU_Y,Y,N,BR_N, N,N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FLW-> List(FPU_Y,Y,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_ADD, Y,M_XRD, MT_W, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + FLD-> List(FPU_Y,Y,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_ADD, Y,M_XRD, MT_D, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + FSW-> List(FPU_Y,Y,N,BR_N, N,N,Y,A2_BTYPE,DW_XPR,FN_ADD, Y,M_XWR, MT_W, N,MUL_X, N,N,WA_X, WB_ALU,PCR_N,N,N,N,N,N), + FSD-> List(FPU_Y,Y,N,BR_N, N,N,Y,A2_BTYPE,DW_XPR,FN_ADD, Y,M_XWR, MT_D, N,MUL_X, N,N,WA_X, WB_ALU,PCR_N,N,N,N,N,N)) } object VDecode extends DecodeConstants { val table = Array( - // jalr eret - // fp_val | renx2 div_val | syscall - // | vec_val | | renx1 mem_val mul_val | wen pcr | | privileged - // val | | brtype | | | s_alu2 dw alu | mem_cmd mem_type| mul_fn | | s_wa s_wb | sync | | | replay_next - // | | | | | | | | | | | | | | | | | | | | | | | | | - VVCFGIVL-> List(VEC_Y,N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,Y), - VVCFG-> List(VEC_Y,N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,Y), - VSETVL-> List(VEC_Y,N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,Y), - VF-> List(VEC_Y,N,Y,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_X, WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VMVV-> List(VEC_Y,N,Y,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - VMSV-> List(VEC_Y,N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VFMVV-> List(VEC_Y,N,Y,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FENCE_V_L-> List(VEC_Y,N,Y,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N), - FENCE_V_G-> List(VEC_Y,N,Y,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_X, WB_X, PCR_N,SYNC_D,N,N,N,N), - VLD-> List(VEC_Y,N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VLW-> List(VEC_Y,N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VLWU-> List(VEC_Y,N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VLH-> List(VEC_Y,N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VLHU-> List(VEC_Y,N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VLB-> List(VEC_Y,N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VLBU-> List(VEC_Y,N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VSD-> List(VEC_Y,N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VSW-> List(VEC_Y,N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VSH-> List(VEC_Y,N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VSB-> List(VEC_Y,N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VFLD-> List(VEC_Y,N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VFLW-> List(VEC_Y,N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VFSD-> List(VEC_Y,N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VFSW-> List(VEC_Y,N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VLSTD-> List(VEC_Y,N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VLSTW-> List(VEC_Y,N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VLSTWU-> List(VEC_Y,N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VLSTH-> List(VEC_Y,N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VLSTHU-> List(VEC_Y,N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VLSTB-> List(VEC_Y,N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VLSTBU-> List(VEC_Y,N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VSSTD-> List(VEC_Y,N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VSSTW-> List(VEC_Y,N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VSSTH-> List(VEC_Y,N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VSSTB-> List(VEC_Y,N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VFLSTD-> List(VEC_Y,N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VFLSTW-> List(VEC_Y,N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VFSSTD-> List(VEC_Y,N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VFSSTW-> List(VEC_Y,N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + // fence.i + // jalr | eret + // fp_val | renx2 div_val | | syscall + // | vec_val | | renx1 mem_val mul_val | wen pcr | | | privileged + // val | | brtype | | | s_alu2 dw alu | mem_cmd mem_type| mul_fn | | s_wa s_wb | | | | | replay_next + // | | | | | | | | | | | | | | | | | | | | | | | | | + VVCFGIVL-> List(VEC_Y,N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,Y), + VVCFG-> List(VEC_Y,N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,Y), + VSETVL-> List(VEC_Y,N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,Y), + VF-> List(VEC_Y,N,Y,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_X, WB_ALU,PCR_N,N,N,N,N,N), + VMVV-> List(VEC_Y,N,Y,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), + VMSV-> List(VEC_Y,N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + VFMVV-> List(VEC_Y,N,Y,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FENCE_V_L-> List(VEC_Y,N,Y,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_X, WB_X, PCR_N,N,N,N,N,N), + FENCE_V_G-> List(VEC_Y,N,Y,BR_N, N,N,N,A2_X, DW_X, FN_X, Y,M_FENCE, MT_X, N,MUL_X, N,N,WA_X, WB_X, PCR_N,N,N,N,N,N), + VLD-> List(VEC_Y,N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + VLW-> List(VEC_Y,N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + VLWU-> List(VEC_Y,N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + VLH-> List(VEC_Y,N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + VLHU-> List(VEC_Y,N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + VLB-> List(VEC_Y,N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + VLBU-> List(VEC_Y,N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + VSD-> List(VEC_Y,N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + VSW-> List(VEC_Y,N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + VSH-> List(VEC_Y,N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + VSB-> List(VEC_Y,N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + VFLD-> List(VEC_Y,N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + VFLW-> List(VEC_Y,N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + VFSD-> List(VEC_Y,N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + VFSW-> List(VEC_Y,N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + VLSTD-> List(VEC_Y,N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + VLSTW-> List(VEC_Y,N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + VLSTWU-> List(VEC_Y,N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + VLSTH-> List(VEC_Y,N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + VLSTHU-> List(VEC_Y,N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + VLSTB-> List(VEC_Y,N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + VLSTBU-> List(VEC_Y,N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + VSSTD-> List(VEC_Y,N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + VSSTW-> List(VEC_Y,N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + VSSTH-> List(VEC_Y,N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + VSSTB-> List(VEC_Y,N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + VFLSTD-> List(VEC_Y,N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + VFLSTW-> List(VEC_Y,N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + VFSSTD-> List(VEC_Y,N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + VFSSTW-> List(VEC_Y,N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - VENQCMD-> List(VEC_Y,N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,Y,N), - VENQIMM1-> List(VEC_Y,N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,Y,N), - VENQIMM2-> List(VEC_Y,N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,Y,N), - VENQCNT-> List(VEC_Y,N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,Y,N), - VXCPTEVAC-> List(VEC_Y,N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,Y,N), - VXCPTKILL-> List(VEC_Y,N,Y,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_X, WB_X, PCR_N,SYNC_N,N,N,Y,N), - VXCPTHOLD-> List(VEC_Y,N,Y,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_X, WB_X, PCR_N,SYNC_N,N,N,Y,N)) + VENQCMD-> List(VEC_Y,N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,N,N,N,Y,N), + VENQIMM1-> List(VEC_Y,N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,N,N,N,Y,N), + VENQIMM2-> List(VEC_Y,N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,N,N,N,Y,N), + VENQCNT-> List(VEC_Y,N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,N,N,N,Y,N), + VXCPTEVAC-> List(VEC_Y,N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,N,N,N,Y,N), + VXCPTKILL-> List(VEC_Y,N,Y,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_X, WB_X, PCR_N,N,N,N,Y,N), + VXCPTHOLD-> List(VEC_Y,N,Y,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_X, WB_X, PCR_N,N,N,N,Y,N)) } class Control(implicit conf: RocketConfiguration) extends Component @@ -342,7 +343,7 @@ class Control(implicit conf: RocketConfiguration) extends Component val id_int_val :: id_fp_val :: id_vec_val :: id_br_type :: id_jalr :: id_renx2 :: id_renx1 :: id_sel_alu2 :: id_fn_dw :: id_fn_alu :: cs0 = cs val id_mem_val :: id_mem_cmd :: id_mem_type :: id_mul_val :: id_mul_fn :: id_div_val :: id_wen :: id_sel_wa :: id_sel_wb :: cs1 = cs0 - val id_pcr :: id_sync :: id_eret :: id_syscall :: id_privileged :: id_replay_next :: Nil = cs1 + val id_pcr :: id_fence_i :: id_eret :: id_syscall :: id_privileged :: id_replay_next :: Nil = cs1 val id_raddr3 = io.dpath.inst(16,12); val id_raddr2 = io.dpath.inst(21,17); @@ -395,13 +396,13 @@ class Control(implicit conf: RocketConfiguration) extends Component val wb_reg_wen = Reg(resetVal = Bool(false)) val wb_reg_fp_wen = Reg(resetVal = Bool(false)) val wb_reg_flush_inst = Reg(resetVal = Bool(false)) + val wb_reg_mem_val = Reg(resetVal = Bool(false)) val wb_reg_eret = Reg(resetVal = Bool(false)) val wb_reg_xcpt = Reg(resetVal = Bool(false)) val wb_reg_replay = Reg(resetVal = Bool(false)) val wb_reg_replay_next = Reg(resetVal = Bool(false)) val wb_reg_cause = Reg(){UFix()} val wb_reg_fp_val = Reg(resetVal = Bool(false)) - val wb_reg_dcache_miss = Reg(io.dmem.resp.bits.miss || io.dmem.resp.bits.nack, resetVal = Bool(false)) val wb_reg_div_mul_val = Reg(resetVal = Bool(false)) val take_pc = Bool() @@ -509,7 +510,7 @@ class Control(implicit conf: RocketConfiguration) extends Component ex_reg_wen := id_wen ex_reg_fp_wen := id_fp_val && io.fpu.dec.wen ex_reg_eret := id_eret.toBool; - ex_reg_flush_inst := (id_sync === SYNC_I); + ex_reg_flush_inst := id_fence_i ex_reg_fp_val := id_fp_val ex_reg_vec_val := id_vec_val.toBool ex_reg_replay_next := id_replay_next @@ -519,20 +520,22 @@ class Control(implicit conf: RocketConfiguration) extends Component ex_reg_xcpt := id_xcpt } - val br_taken = - Mux(ex_reg_br_type === BR_EQ, io.dpath.br_eq, - Mux(ex_reg_br_type === BR_NE, ~io.dpath.br_eq, - Mux(ex_reg_br_type === BR_LT, io.dpath.br_lt, - Mux(ex_reg_br_type === BR_GE, ~io.dpath.br_lt, - Mux(ex_reg_br_type === BR_LTU, io.dpath.br_ltu, - Mux(ex_reg_br_type === BR_GEU, ~io.dpath.br_ltu, - ex_reg_br_type === BR_J)))))) - val take_pc_ex = !Mux(ex_reg_jalr, ex_reg_btb_hit && io.dpath.jalr_eq, ex_reg_btb_hit === br_taken) + // replay inst in ex stage + val wb_dcache_miss = wb_reg_mem_val && (wb_reg_wen || wb_reg_fp_wen) && !io.dmem.resp.valid + val replay_ex = wb_dcache_miss && ex_reg_load_use || mem_reg_flush_inst || + ex_reg_mem_val && !io.dmem.req.ready || + ex_reg_div_val && !io.dpath.div_rdy || + ex_reg_mul_val && !io.dpath.mul_rdy || + mem_reg_replay_next + ctrl_killx := take_pc_wb || replay_ex + + val take_pc_ex = !Mux(ex_reg_jalr, ex_reg_btb_hit && io.dpath.jalr_eq, ex_reg_btb_hit === io.dpath.ex_br_taken) val (ex_xcpt, ex_cause) = checkExceptions(List( (ex_reg_xcpt_interrupt || ex_reg_xcpt, ex_reg_cause), (ex_reg_fp_val && io.fpu.illegal_rm, UFix(2)))) + mem_reg_replay := replay_ex && !take_pc_wb; mem_reg_xcpt_interrupt := ex_reg_xcpt_interrupt && !take_pc_wb when (ex_xcpt) { mem_reg_cause := ex_cause } mem_reg_div_val := ex_reg_div_val && io.dpath.div_rdy @@ -568,9 +571,16 @@ class Control(implicit conf: RocketConfiguration) extends Component (mem_reg_xcpt_interrupt || mem_reg_xcpt, mem_reg_cause), (mem_reg_mem_val && io.dmem.xcpt.ma.ld, UFix( 8)), (mem_reg_mem_val && io.dmem.xcpt.ma.st, UFix( 9)), - (mem_reg_mem_val && io.dmem.xcpt.pf.ld, UFix(10)), - (mem_reg_mem_val && io.dmem.xcpt.pf.st, UFix(11)))) + (mem_reg_mem_val && io.dmem.xcpt.pf.ld, UFix(10)), + (mem_reg_mem_val && io.dmem.xcpt.pf.st, UFix(11)))) + val fpu_kill_mem = mem_reg_fp_val && io.fpu.nack_mem + val ll_wb_kill_mem = io.dpath.mem_ll_wb && (mem_reg_wen || mem_reg_fp_wen) + val replay_mem = ll_wb_kill_mem || mem_reg_replay || fpu_kill_mem + val killm_common = ll_wb_kill_mem || take_pc_wb || mem_reg_xcpt || !mem_reg_valid + ctrl_killm := killm_common || mem_xcpt || fpu_kill_mem + + wb_reg_replay := replay_mem && !take_pc_wb wb_reg_xcpt := mem_xcpt && !take_pc_wb && !wb_reg_replay_next when (mem_xcpt) { wb_reg_cause := mem_cause } @@ -581,6 +591,7 @@ class Control(implicit conf: RocketConfiguration) extends Component wb_reg_fp_wen := Bool(false); wb_reg_eret := Bool(false); wb_reg_flush_inst := Bool(false); + wb_reg_mem_val := Bool(false) wb_reg_div_mul_val := Bool(false); wb_reg_fp_val := Bool(false) wb_reg_replay_next := Bool(false) @@ -592,77 +603,45 @@ class Control(implicit conf: RocketConfiguration) extends Component wb_reg_fp_wen := mem_reg_fp_wen; wb_reg_eret := mem_reg_eret && !mem_reg_replay wb_reg_flush_inst := mem_reg_flush_inst; + wb_reg_mem_val := mem_reg_mem_val wb_reg_div_mul_val := mem_reg_div_val || mem_reg_mul_val wb_reg_fp_val := mem_reg_fp_val wb_reg_replay_next := mem_reg_replay_next } - val sboard = new rocketCtrlSboard(32, 3, 2); - sboard.io.r(0).addr := id_raddr2.toUFix; - sboard.io.r(1).addr := id_raddr1.toUFix; - sboard.io.r(2).addr := id_waddr.toUFix; + val replay_wb = io.dmem.resp.bits.nack || wb_reg_replay || vec_replay || io.dpath.pcr_replay - // scoreboard set (for D$ misses, div, mul) - sboard.io.w(0).en := wb_reg_div_mul_val || wb_reg_dcache_miss && wb_reg_wen - sboard.io.w(0).data := Bool(true) - sboard.io.w(0).addr := io.dpath.wb_waddr + class Scoreboard + { + val r = Reg(resetVal = Bits(0)) + var next = r + var ens = Bool(false) + def apply(addr: UFix) = r(addr) + def set(en: Bool, addr: UFix): Unit = update(en, next | mask(en, addr)) + def clear(en: Bool, addr: UFix): Unit = update(en, next & ~mask(en, addr)) + private def mask(en: Bool, addr: UFix) = Mux(en, UFix(1) << addr, UFix(0)) + private def update(en: Bool, update: Bits) = { + next = update + ens = ens || en + when (ens) { r := next } + } + } - sboard.io.w(1).en := io.dpath.sboard_clr - sboard.io.w(1).data := Bool(false) - sboard.io.w(1).addr := io.dpath.sboard_clra + val sboard = new Scoreboard + sboard.set(wb_reg_div_mul_val || wb_dcache_miss && io.dpath.wb_wen, io.dpath.wb_waddr) + sboard.clear(io.dpath.mem_ll_wb, io.dpath.mem_ll_waddr) - val id_stall_raddr2 = id_renx2.toBool && sboard.io.r(0).data - val id_stall_raddr1 = id_renx1.toBool && sboard.io.r(1).data - val id_stall_waddr = id_wen.toBool && sboard.io.r(2).data - - var id_stall_fpu = Bool(false) - if (HAVE_FPU) { - val fp_sboard = new rocketCtrlSboard(32, 4, 3); - fp_sboard.io.r(0).addr := id_raddr1.toUFix - fp_sboard.io.r(1).addr := id_raddr2.toUFix - fp_sboard.io.r(2).addr := id_raddr3.toUFix - fp_sboard.io.r(3).addr := id_waddr.toUFix - - fp_sboard.io.w(0).en := wb_reg_dcache_miss && wb_reg_fp_wen || io.fpu.sboard_set - fp_sboard.io.w(0).data := Bool(true) - fp_sboard.io.w(0).addr := io.dpath.fp_sboard_wb_waddr - - fp_sboard.io.w(1).en := io.dpath.fp_sboard_clr - fp_sboard.io.w(1).data := Bool(false) - fp_sboard.io.w(1).addr := io.dpath.fp_sboard_clra - - fp_sboard.io.w(2).en := io.fpu.sboard_clr - fp_sboard.io.w(2).data := Bool(false) - fp_sboard.io.w(2).addr := io.fpu.sboard_clra - - id_stall_fpu = io.fpu.dec.ren1 && fp_sboard.io.r(0).data || - io.fpu.dec.ren2 && fp_sboard.io.r(1).data || - io.fpu.dec.ren3 && fp_sboard.io.r(2).data || - io.fpu.dec.wen && fp_sboard.io.r(3).data - } - - // replay inst in ex stage - val replay_ex = wb_reg_dcache_miss && ex_reg_load_use || mem_reg_flush_inst || - ex_reg_mem_val && !io.dmem.req.ready || - ex_reg_div_val && !io.dpath.div_rdy || - ex_reg_mul_val && !io.dpath.mul_rdy || - mem_reg_replay_next - ctrl_killx := take_pc_wb || replay_ex - - // replay inst in mem stage - val mem_ll_wb = io.dpath.mem_wb || io.dpath.mul_result_val || io.dpath.div_result_val - val dmem_kill_mem = mem_reg_valid && io.dmem.resp.bits.nack - val fpu_kill_mem = mem_reg_fp_val && io.fpu.nack_mem - val replay_mem = dmem_kill_mem || mem_reg_wen && mem_ll_wb || mem_reg_replay || fpu_kill_mem - val killm_common = mem_reg_wen && mem_ll_wb || take_pc_wb || mem_reg_xcpt || !mem_reg_valid - ctrl_killm := killm_common || mem_xcpt || dmem_kill_mem || fpu_kill_mem - - mem_reg_replay := replay_ex && !take_pc_wb; - - wb_reg_replay := replay_mem && !take_pc_wb - - val replay_wb = wb_reg_replay || vec_replay || io.dpath.pcr_replay + val id_stall_fpu = if (HAVE_FPU) { + val fp_sboard = new Scoreboard + fp_sboard.set(wb_dcache_miss && wb_reg_fp_wen && !replay_wb || io.fpu.sboard_set, io.dpath.wb_waddr) + fp_sboard.clear(io.dpath.fp_sboard_clr, io.dpath.fp_sboard_clra) + fp_sboard.clear(io.fpu.sboard_clr, io.fpu.sboard_clra) + io.fpu.dec.ren1 && fp_sboard(id_raddr1) || + io.fpu.dec.ren2 && fp_sboard(id_raddr2) || + io.fpu.dec.ren3 && fp_sboard(id_raddr3) || + io.fpu.dec.wen && fp_sboard(id_waddr) + } else Bool(false) // write cause to PCR on an exception io.dpath.exception := wb_reg_xcpt @@ -671,7 +650,7 @@ class Control(implicit conf: RocketConfiguration) extends Component io.dpath.vec_irq_aux_wen := wb_reg_xcpt && wb_reg_cause >= UFix(24) && wb_reg_cause < UFix(32) // control transfer from ex/wb - take_pc_wb := wb_reg_replay || vec_replay || wb_reg_xcpt || wb_reg_eret + take_pc_wb := replay_wb || wb_reg_xcpt || wb_reg_eret take_pc := take_pc_ex || take_pc_wb; io.dpath.sel_pc := @@ -700,7 +679,7 @@ class Control(implicit conf: RocketConfiguration) extends Component fp_data_hazard_ex && (ex_reg_mem_val || ex_reg_fp_val) // stall for RAW/WAW hazards on LB/LH and mul/div in memory stage. - val mem_mem_cmd_bh = + val mem_mem_cmd_bh = if (conf.fastLoadByte) Bool(false) else (mem_reg_mem_type === MT_B) || (mem_reg_mem_type === MT_BU) || (mem_reg_mem_type === MT_H) || (mem_reg_mem_type === MT_HU) val data_hazard_mem = mem_reg_wen && @@ -718,20 +697,24 @@ class Control(implicit conf: RocketConfiguration) extends Component // stall for RAW/WAW hazards on load/AMO misses and mul/div in writeback. val data_hazard_wb = wb_reg_wen && - (id_raddr1 != UFix(0) && id_renx1 && id_raddr1 === io.dpath.wb_waddr || - id_raddr2 != UFix(0) && id_renx2 && id_raddr2 === io.dpath.wb_waddr || - id_waddr != UFix(0) && id_wen && id_waddr === io.dpath.wb_waddr) + (id_raddr1 != UFix(0) && id_renx1 && (id_raddr1 === io.dpath.wb_waddr) || + id_raddr2 != UFix(0) && id_renx2 && (id_raddr2 === io.dpath.wb_waddr) || + id_waddr != UFix(0) && id_wen && (id_waddr === io.dpath.wb_waddr)) val fp_data_hazard_wb = wb_reg_fp_wen && (io.fpu.dec.ren1 && id_raddr1 === io.dpath.wb_waddr || io.fpu.dec.ren2 && id_raddr2 === io.dpath.wb_waddr || io.fpu.dec.ren3 && id_raddr3 === io.dpath.wb_waddr || io.fpu.dec.wen && id_waddr === io.dpath.wb_waddr) - val id_wb_hazard = data_hazard_wb && (wb_reg_dcache_miss || wb_reg_div_mul_val) || - fp_data_hazard_wb && (wb_reg_dcache_miss || wb_reg_fp_val) + val id_wb_hazard = data_hazard_wb && (wb_dcache_miss || wb_reg_div_mul_val) || + fp_data_hazard_wb && (wb_dcache_miss || wb_reg_fp_val) + + val id_sboard_hazard = + (id_raddr1 != UFix(0) && id_renx1 && sboard(id_raddr1) || + id_raddr2 != UFix(0) && id_renx2 && sboard(id_raddr2) || + id_waddr != UFix(0) && id_wen && sboard(id_waddr)) val ctrl_stalld = - id_ex_hazard || id_mem_hazard || id_wb_hazard || - id_stall_raddr1 || id_stall_raddr2 || id_stall_waddr || + id_ex_hazard || id_mem_hazard || id_wb_hazard || id_sboard_hazard || id_fp_val && id_stall_fpu || id_mem_val && !io.dmem.req.ready || vec_stalld @@ -742,6 +725,7 @@ class Control(implicit conf: RocketConfiguration) extends Component io.imem.invalidate := wb_reg_flush_inst io.dpath.mem_load := mem_reg_mem_val && mem_reg_wen + io.dpath.wb_load := wb_reg_mem_val && io.dpath.wb_wen io.dpath.ren2 := id_renx2.toBool; io.dpath.ren1 := id_renx1.toBool; io.dpath.sel_alu2 := id_sel_alu2.toUFix @@ -758,13 +742,14 @@ class Control(implicit conf: RocketConfiguration) extends Component io.dpath.ex_jalr := ex_reg_jalr io.dpath.ex_wen := ex_reg_wen; io.dpath.mem_wen := mem_reg_wen; - io.dpath.wb_wen := wb_reg_wen; - io.dpath.wb_valid := wb_reg_valid && !vec_replay + io.dpath.wb_wen := wb_reg_wen && !replay_wb + io.dpath.wb_valid := wb_reg_valid && !replay_wb io.dpath.sel_wa := id_sel_wa.toBool; io.dpath.sel_wb := id_sel_wb.toUFix io.dpath.pcr := wb_reg_pcr.toUFix - io.dpath.wb_eret := wb_reg_eret; + io.dpath.eret := wb_reg_eret io.dpath.ex_mem_type := ex_reg_mem_type + io.dpath.ex_br_type := ex_reg_br_type io.fpu.valid := !ctrl_killd && id_fp_val io.fpu.killx := ctrl_killx diff --git a/rocket/src/main/scala/ctrl_util.scala b/rocket/src/main/scala/ctrl_util.scala deleted file mode 100644 index 34b6d40b..00000000 --- a/rocket/src/main/scala/ctrl_util.scala +++ /dev/null @@ -1,32 +0,0 @@ -package rocket - -import Chisel._ -import Node._ - -class rocketCtrlSboard(entries: Int, nread: Int, nwrite: Int) extends Component -{ - class read_port extends Bundle { - val addr = UFix(INPUT, log2Up(entries)) - val data = Bool(OUTPUT) - } - class write_port extends Bundle { - val en = Bool(INPUT) - val addr = UFix(INPUT, log2Up(entries)) - val data = Bool(INPUT) - } - - val io = new Bundle { - val r = Vec(nread) { new read_port() } - val w = Vec(nwrite) { new write_port() } - } - - val busybits = Reg(resetVal = Bits(0, entries)) - - val wmasks = (0 until nwrite).map(i => Fill(entries, io.w(i).en) & (UFix(1) << io.w(i).addr)) - val wdatas = (0 until nwrite).map(i => Mux(io.w(i).data, wmasks(i), UFix(0))) - var next = busybits & ~wmasks.reduceLeft(_|_) | wdatas.reduceLeft(_|_) - busybits := next - - for (i <- 0 until nread) - io.r(i).data := busybits(io.r(i).addr) -} diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index 4e6bcb7c..3979bef0 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -4,6 +4,7 @@ import Chisel._ import Node._ import Constants._ import Instructions._ +import Util._ import hwacha._ class Datapath(implicit conf: RocketConfiguration) extends Component @@ -21,13 +22,6 @@ class Datapath(implicit conf: RocketConfiguration) extends Component val vec_imul_resp = Bits(INPUT, hwacha.Constants.SZ_XLEN) } - val pcr = new rocketDpathPCR(); - val ex_pcr = pcr.io.r.data; - - val alu = new ALU - val ex_alu_out = alu.io.out; - val ex_alu_adder_out = alu.io.adder_out; - // execute definitions val ex_reg_pc = Reg() { UFix() }; val ex_reg_inst = Reg() { Bits() }; @@ -59,7 +53,6 @@ class Datapath(implicit conf: RocketConfiguration) extends Component val wb_reg_rs2 = Reg() { Bits() }; val wb_reg_waddr = Reg() { UFix() } val wb_reg_wdata = Reg() { Bits() } - val wb_reg_dmem_wdata = Reg() { Bits() } val wb_reg_vec_waddr = Reg() { UFix() } val wb_reg_vec_wdata = Reg() { Bits() } val wb_reg_raddr1 = Reg() { UFix() }; @@ -67,25 +60,6 @@ class Datapath(implicit conf: RocketConfiguration) extends Component val wb_reg_ll_wb = Reg(resetVal = Bool(false)); val wb_wdata = Bits(); - val dmem_resp_replay = Bool() - val r_dmem_resp_replay = Reg(resetVal = Bool(false)); - val r_dmem_fp_replay = Reg(resetVal = Bool(false)); - val r_dmem_resp_waddr = Reg() { UFix() }; - - val ex_pc_plus4 = ex_reg_pc + UFix(4); - val ex_branch_target = ex_reg_pc + Cat(ex_reg_op2(VADDR_BITS-1,0), Bits(0,1)).toUFix - - val ex_ea_sign = Mux(ex_alu_adder_out(VADDR_BITS-1), ~ex_alu_adder_out(63,VADDR_BITS) === UFix(0), ex_alu_adder_out(63,VADDR_BITS) != UFix(0)) - val ex_effective_address = Cat(ex_ea_sign, ex_alu_adder_out(VADDR_BITS-1,0)).toUFix - - // hook up I$ - io.imem.req.bits.currentpc := ex_reg_pc - io.imem.req.bits.pc := - Mux(io.ctrl.sel_pc === PC_EX4, ex_pc_plus4, - Mux(io.ctrl.sel_pc === PC_EX, Mux(io.ctrl.ex_jalr, ex_effective_address, ex_branch_target), - Mux(io.ctrl.sel_pc === PC_PCR, Cat(pcr.io.evec(VADDR_BITS-1), pcr.io.evec).toUFix, - wb_reg_pc))) // PC_WB - // instruction decode stage val id_inst = io.imem.resp.bits.data val id_pc = io.imem.resp.bits.pc @@ -159,16 +133,16 @@ class Datapath(implicit conf: RocketConfiguration) extends Component when (io.ctrl.ren2) { ex_reg_rs2 := id_rs2 } } - val ex_rs1 = Mux(Reg(id_rs1_dmem_bypass), wb_reg_dmem_wdata, ex_reg_rs1) - val ex_rs2 = Mux(Reg(id_rs2_dmem_bypass), wb_reg_dmem_wdata, ex_reg_rs2) - val ex_op2 = Mux(Reg(id_op2_dmem_bypass), wb_reg_dmem_wdata, ex_reg_op2) + val dmem_resp_data = if (conf.fastLoadByte) io.dmem.resp.bits.data_subword else io.dmem.resp.bits.data + val ex_rs1 = Mux(Reg(id_rs1_dmem_bypass), dmem_resp_data, ex_reg_rs1) + val ex_rs2 = Mux(Reg(id_rs2_dmem_bypass), dmem_resp_data, ex_reg_rs2) + val ex_op2 = Mux(Reg(id_op2_dmem_bypass), dmem_resp_data, ex_reg_op2) - alu.io.dw := ex_reg_ctrl_fn_dw; - alu.io.fn := ex_reg_ctrl_fn_alu; - alu.io.in2 := ex_op2.toUFix - alu.io.in1 := ex_rs1.toUFix - - io.fpu.fromint_data := ex_rs1 + val alu = new ALU + alu.io.dw := ex_reg_ctrl_fn_dw; + alu.io.fn := ex_reg_ctrl_fn_alu; + alu.io.in2 := ex_op2.toUFix + alu.io.in1 := ex_rs1.toUFix // divider val div = new rocketDivider(earlyOut = true) @@ -178,7 +152,7 @@ class Datapath(implicit conf: RocketConfiguration) extends Component div.io.req.bits.in1 := ex_op2 div.io.req_tag := ex_reg_waddr div.io.req_kill := io.ctrl.div_kill - div.io.resp_rdy := !dmem_resp_replay + div.io.resp_rdy := Bool(true) io.ctrl.div_rdy := div.io.req.ready io.ctrl.div_result_val := div.io.resp_val @@ -197,11 +171,23 @@ class Datapath(implicit conf: RocketConfiguration) extends Component mul_io.req.bits.in1 := ex_op2 mul_io.req_tag := ex_reg_waddr mul_io.req_kill := io.ctrl.mul_kill - mul_io.resp_rdy := !dmem_resp_replay && !div.io.resp_val + mul_io.resp_rdy := Bool(true) io.ctrl.mul_rdy := mul_io.req.ready io.ctrl.mul_result_val := mul_io.resp_val - io.ctrl.ex_waddr := ex_reg_waddr; // for load/use hazard detection & bypass control + io.fpu.fromint_data := ex_rs1 + io.ctrl.ex_waddr := ex_reg_waddr + + def vaSign(a0: Bits, ea: Bits) = { + // efficient means to compress 64-bit VA into VADDR_BITS+1 bits + // (VA is bad if VA(VADDR_BITS) != VA(VADDR_BITS-1)) + val a = a0 >> VADDR_BITS-1 + val e = ea(VADDR_BITS,VADDR_BITS-1) + Mux(a === UFix(0) || a === UFix(1), e != UFix(0), + Mux(a === Fix(-1) || a === Fix(-2), e === Fix(-1), + Bool(false))) + } + val ex_effective_address = Cat(vaSign(ex_rs1, alu.io.adder_out), alu.io.adder_out(VADDR_BITS-1,0)).toUFix // D$ request interface (registered inside D$ module) // other signals (req_val, req_rdy) connect to control module @@ -211,15 +197,14 @@ class Datapath(implicit conf: RocketConfiguration) extends Component require(io.dmem.req.bits.tag.getWidth >= 6) // processor control regfile read + val pcr = new rocketDpathPCR() pcr.io.r.en := io.ctrl.pcr != PCR_N pcr.io.r.addr := wb_reg_raddr1 pcr.io.host <> io.host - - io.ctrl.irq_timer := pcr.io.irq_timer; - io.ctrl.irq_ipi := pcr.io.irq_ipi; - io.ctrl.status := pcr.io.status; - io.ctrl.pcr_replay := pcr.io.replay + pcr.io <> io.ctrl + pcr.io.pc := wb_reg_pc + io.ctrl.pcr_replay := pcr.io.replay io.ptw.ptbr := pcr.io.ptbr io.ptw.invalidate := pcr.io.ptbr_wen @@ -227,11 +212,17 @@ class Datapath(implicit conf: RocketConfiguration) extends Component // branch resolution logic io.ctrl.jalr_eq := ex_reg_rs1 === id_pc.toFix && ex_reg_op2(id_imm_small.getWidth-1,0) === UFix(0) - io.ctrl.br_eq := (ex_rs1 === ex_rs2) - io.ctrl.br_ltu := (ex_rs1.toUFix < ex_rs2.toUFix) - io.ctrl.br_lt := - (~(ex_rs1(63) ^ ex_rs2(63)) & io.ctrl.br_ltu | - ex_rs1(63) & ~ex_rs2(63)).toBool + io.ctrl.ex_br_taken := + Mux(io.ctrl.ex_br_type === BR_EQ, ex_rs1 === ex_rs2, + Mux(io.ctrl.ex_br_type === BR_NE, ex_rs1 != ex_rs2, + Mux(io.ctrl.ex_br_type === BR_LT, ex_rs1.toFix < ex_rs2.toFix, + Mux(io.ctrl.ex_br_type === BR_GE, ex_rs1.toFix >= ex_rs2.toFix, + Mux(io.ctrl.ex_br_type === BR_LTU, ex_rs1 < ex_rs2, + Mux(io.ctrl.ex_br_type === BR_GEU, ex_rs1 >= ex_rs2, + io.ctrl.ex_br_type === BR_J)))))) + + val ex_pc_plus4 = ex_reg_pc + UFix(4) + val ex_branch_target = ex_reg_pc + Cat(ex_reg_op2(VADDR_BITS-1,0), Bits(0,1)).toUFix // time stamp counter val tsc_reg = Reg(resetVal = UFix(0,64)); @@ -245,7 +236,7 @@ class Datapath(implicit conf: RocketConfiguration) extends Component Mux(ex_reg_ctrl_sel_wb === WB_PC, ex_pc_plus4.toFix, Mux(ex_reg_ctrl_sel_wb === WB_TSC, tsc_reg, Mux(ex_reg_ctrl_sel_wb === WB_IRT, irt_reg, - ex_alu_out))).toBits // WB_ALU + alu.io.out))).toBits // WB_ALU // memory stage mem_reg_kill := ex_reg_kill @@ -262,23 +253,29 @@ class Datapath(implicit conf: RocketConfiguration) extends Component // for load/use hazard detection (load byte/halfword) io.ctrl.mem_waddr := mem_reg_waddr; - // 32/64 bit load handling (moved to earlier in file) - // writeback arbitration val dmem_resp_xpu = !io.dmem.resp.bits.tag(0).toBool val dmem_resp_fpu = io.dmem.resp.bits.tag(0).toBool val dmem_resp_waddr = io.dmem.resp.bits.tag.toUFix >> UFix(1) - dmem_resp_replay := io.dmem.resp.bits.replay && dmem_resp_xpu; - r_dmem_resp_replay := dmem_resp_replay - r_dmem_resp_waddr := dmem_resp_waddr - r_dmem_fp_replay := io.dmem.resp.bits.replay && dmem_resp_fpu; + val dmem_resp_replay = io.dmem.resp.bits.replay && dmem_resp_xpu - val mem_ll_waddr = Mux(dmem_resp_replay, dmem_resp_waddr, - Mux(div.io.resp_val, div.io.resp_tag, - mul_io.resp_tag)) - val mem_ll_wdata = Mux(div.io.resp_val, div.io.resp_bits, - mul_io.resp_bits) - val mem_ll_wb = dmem_resp_replay || div.io.resp_val || mul_io.resp_val + val mem_ll_wdata = Bits() + mem_ll_wdata := mul_io.resp_bits + io.ctrl.mem_ll_waddr := mul_io.resp_tag + io.ctrl.mem_ll_wb := mul_io.resp_val + when (div.io.resp_val) { + mul_io.resp_rdy := Bool(false) + mem_ll_wdata := div.io.resp_bits + io.ctrl.mem_ll_waddr := div.io.resp_tag + io.ctrl.mem_ll_wb := Bool(true) + } + when (dmem_resp_replay) { + mul_io.resp_rdy := Bool(false) + div.io.resp_rdy := Bool(false) + mem_ll_wdata := io.dmem.resp.bits.data_subword + io.ctrl.mem_ll_waddr := dmem_resp_waddr + io.ctrl.mem_ll_wb := Bool(true) + } io.fpu.dmem_resp_val := io.dmem.resp.valid && dmem_resp_fpu io.fpu.dmem_resp_data := io.dmem.resp.bits.data @@ -286,9 +283,6 @@ class Datapath(implicit conf: RocketConfiguration) extends Component io.fpu.dmem_resp_tag := dmem_resp_waddr // writeback stage - when (io.ctrl.mem_load) { - wb_reg_dmem_wdata := io.dmem.resp.bits.data - } when (!mem_reg_kill) { wb_reg_pc := mem_reg_pc wb_reg_inst := mem_reg_inst @@ -300,15 +294,12 @@ class Datapath(implicit conf: RocketConfiguration) extends Component wb_reg_waddr := mem_reg_waddr wb_reg_wdata := Mux(io.ctrl.mem_fp_val && io.ctrl.mem_wen, io.fpu.toint_data, mem_reg_wdata) } - wb_reg_ll_wb := mem_ll_wb - when (mem_ll_wb) { - wb_reg_waddr := mem_ll_waddr + wb_reg_ll_wb := io.ctrl.mem_ll_wb + when (io.ctrl.mem_ll_wb) { + wb_reg_waddr := io.ctrl.mem_ll_waddr wb_reg_wdata := mem_ll_wdata } - // regfile write - val wb_src_dmem = Reg(io.ctrl.mem_load) && io.ctrl.wb_valid || r_dmem_resp_replay - if (HAVE_VEC) { // vector datapath @@ -333,7 +324,7 @@ class Datapath(implicit conf: RocketConfiguration) extends Component wb_wdata := Mux(vec.io.wen, Cat(Bits(0,52), vec.io.appvl), - Mux(wb_src_dmem, io.dmem.resp.bits.data_subword, + Mux(io.ctrl.wb_load, io.dmem.resp.bits.data_subword, wb_reg_wdata)) } else @@ -344,7 +335,7 @@ class Datapath(implicit conf: RocketConfiguration) extends Component pcr.io.vec_nfregs := UFix(0) wb_wdata := - Mux(wb_src_dmem, io.dmem.resp.bits.data_subword, + Mux(io.ctrl.wb_load, io.dmem.resp.bits.data_subword, wb_reg_wdata) } @@ -355,14 +346,10 @@ class Datapath(implicit conf: RocketConfiguration) extends Component when (rf_wen) { writeRF(rf_waddr, rf_wdata) } io.ctrl.wb_waddr := wb_reg_waddr - io.ctrl.mem_wb := dmem_resp_replay; // scoreboard clear (for div/mul and D$ load miss writebacks) - io.ctrl.sboard_clr := mem_ll_wb - io.ctrl.sboard_clra := mem_ll_waddr - io.ctrl.fp_sboard_clr := r_dmem_fp_replay - io.ctrl.fp_sboard_clra := r_dmem_resp_waddr - io.ctrl.fp_sboard_wb_waddr := Reg(mem_reg_waddr) + io.ctrl.fp_sboard_clr := io.dmem.resp.bits.replay && dmem_resp_fpu + io.ctrl.fp_sboard_clra := dmem_resp_waddr // processor control regfile write pcr.io.w.addr := wb_reg_raddr1 @@ -371,10 +358,11 @@ class Datapath(implicit conf: RocketConfiguration) extends Component Mux(io.ctrl.pcr === PCR_C, pcr.io.r.data & ~wb_reg_wdata, wb_reg_wdata)) - pcr.io.eret := io.ctrl.wb_eret; - pcr.io.exception := io.ctrl.exception; - pcr.io.cause := io.ctrl.cause; - pcr.io.pc := wb_reg_pc; - pcr.io.badvaddr_wen := io.ctrl.badvaddr_wen; - pcr.io.vec_irq_aux_wen := io.ctrl.vec_irq_aux_wen + // hook up I$ + io.imem.req.bits.currentpc := ex_reg_pc + io.imem.req.bits.pc := + Mux(io.ctrl.sel_pc === PC_EX4, ex_pc_plus4, + Mux(io.ctrl.sel_pc === PC_EX, Mux(io.ctrl.ex_jalr, ex_effective_address, ex_branch_target), + Mux(io.ctrl.sel_pc === PC_PCR, Cat(pcr.io.evec(VADDR_BITS-1), pcr.io.evec).toUFix, + wb_reg_pc))) // PC_WB } diff --git a/rocket/src/main/scala/fpu.scala b/rocket/src/main/scala/fpu.scala index 3ea883db..9e2961fe 100644 --- a/rocket/src/main/scala/fpu.scala +++ b/rocket/src/main/scala/fpu.scala @@ -183,7 +183,7 @@ object RegEn when (en) { r := data } r } - def apply[T <: Bits](data: T, en: Bool, resetVal: Bool) = { + def apply[T <: Bits](data: T, en: Bool, resetVal: T) = { val r = Reg(resetVal = resetVal) { data.clone } when (en) { r := data } r @@ -478,15 +478,10 @@ class FPU(sfma_latency: Int, dfma_latency: Int) extends Component val wb_ctrl = RegEn(mem_ctrl, mem_reg_valid) // load response - val load_wb = Reg(io.dpath.dmem_resp_val, resetVal = Bool(false)) - val load_wb_single = Reg() { Bool() } - val load_wb_data = Reg() { Bits(width = 64) } // XXX WTF why doesn't bit width inference work for the regfile?! - val load_wb_tag = Reg() { UFix() } - when (io.dpath.dmem_resp_val) { - load_wb_single := io.dpath.dmem_resp_type === MT_W || io.dpath.dmem_resp_type === MT_WU - load_wb_data := io.dpath.dmem_resp_data - load_wb_tag := io.dpath.dmem_resp_tag - } + val load_wb = io.dpath.dmem_resp_val + val load_wb_single = io.dpath.dmem_resp_type === MT_W + val load_wb_data = io.dpath.dmem_resp_data + val load_wb_tag = io.dpath.dmem_resp_tag val rec_s = hardfloat.floatNToRecodedFloatN(load_wb_data, 23, 9) val rec_d = hardfloat.floatNToRecodedFloatN(load_wb_data, 52, 12) val load_wb_data_recoded = Mux(load_wb_single, Cat(Fix(-1, 32), rec_s), rec_d) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 5c9bed76..5db2faf0 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -1,6 +1,7 @@ package rocket import Chisel._ +import Node._ import Constants._ import uncore._ import Util._ @@ -78,66 +79,46 @@ case class LoadGen(typ: Bits, addr: Bits, dat: Bits) } class MSHRReq(implicit conf: DCacheConfig) extends Bundle { - val tag_miss = Bool() val old_dirty = Bool() val old_tag = Bits(width = conf.tagbits) - val tag = Bits(width = conf.tagbits) - val idx = Bits(width = conf.idxbits) - val way_oh = Bits(width = conf.ways) + val way_en = Bits(width = conf.ways) - val offset = Bits(width = conf.offbits) - val cmd = Bits(width = 4) - val typ = Bits(width = 3) - val cpu_tag = Bits(width = conf.reqtagbits) - val data = Bits(width = conf.databits) + val addr = UFix(width = conf.paddrbits) + val cmd = Bits(width = 4) + val typ = Bits(width = 3) + val tag = Bits(width = conf.reqtagbits) + val data = Bits(width = conf.databits) override def clone = new MSHRReq().asInstanceOf[this.type] } -class RPQEntry(implicit conf: DCacheConfig) extends Bundle { - val offset = Bits(width = conf.offbits) - val cmd = Bits(width = 4) - val typ = Bits(width = 3) +class Replay(implicit conf: DCacheConfig) extends HellaCacheReq { val sdq_id = UFix(width = log2Up(conf.nsdq)) - val cpu_tag = Bits(width = conf.reqtagbits) - - override def clone = new RPQEntry().asInstanceOf[this.type] -} - -class Replay(implicit conf: DCacheConfig) extends RPQEntry { - val idx = Bits(width = conf.idxbits) - val way_oh = Bits(width = conf.ways) override def clone = new Replay().asInstanceOf[this.type] } -class DataReq(implicit conf: DCacheConfig) extends Bundle { - val idx = Bits(width = conf.idxbits) - val offset = Bits(width = conf.offbits) - val cmd = Bits(width = 4) - val typ = Bits(width = 3) - val data = Bits(width = conf.databits) - val way_oh = Bits(width = conf.ways) +class DataReadReq(implicit conf: DCacheConfig) extends Bundle { + val way_en = Bits(width = conf.ways) + val addr = Bits(width = conf.untagbits) - override def clone = new DataReq().asInstanceOf[this.type] + override def clone = new DataReadReq().asInstanceOf[this.type] } -class DataArrayReq(implicit conf: DCacheConfig) extends Bundle { +class DataWriteReq(implicit conf: DCacheConfig) extends Bundle { val way_en = Bits(width = conf.ways) - val idx = Bits(width = conf.idxbits) - val offset = Bits(width = log2Up(REFILL_CYCLES)) - val rw = Bool() - val wmask = Bits(width = MEM_DATA_BITS/8) + val addr = Bits(width = conf.untagbits) + val wmask = Bits(width = MEM_DATA_BITS/conf.databits) val data = Bits(width = MEM_DATA_BITS) - override def clone = new DataArrayReq().asInstanceOf[this.type] + override def clone = new DataWriteReq().asInstanceOf[this.type] } class WritebackReq(implicit conf: DCacheConfig) extends Bundle { val tag = Bits(width = conf.tagbits) val idx = Bits(width = conf.idxbits) - val way_oh = Bits(width = conf.ways) + val way_en = Bits(width = conf.ways) val tile_xact_id = Bits(width = TILE_XACT_ID_BITS) override def clone = new WritebackReq().asInstanceOf[this.type] @@ -169,14 +150,12 @@ class MSHR(id: Int)(implicit conf: DCacheConfig) extends Component { val req_sdq_id = UFix(INPUT, log2Up(conf.nsdq)) val idx_match = Bool(OUTPUT) - val idx = Bits(OUTPUT, conf.idxbits) - val refill_count = Bits(OUTPUT, log2Up(REFILL_CYCLES)) val tag = Bits(OUTPUT, conf.tagbits) - val way_oh = Bits(OUTPUT, conf.ways) val mem_req = (new FIFOIO) { new TransactionInit } + val mem_resp = new DataWriteReq().asOutput val meta_req = (new FIFOIO) { new MetaArrayReq() } - val replay = (new FIFOIO) { new Replay() } + val replay = (new FIFOIO) { new Replay() } val mem_abort = (new PipeIO) { new TransactionAbort }.flip val mem_rep = (new PipeIO) { new TransactionReply }.flip val mem_finish = (new FIFOIO) { new TransactionFinish } @@ -185,9 +164,8 @@ class MSHR(id: Int)(implicit conf: DCacheConfig) extends Component { val probe_refill = (new FIFOIO) { Bool() }.flip } - val s_invalid :: s_wb_req :: s_wb_resp :: s_meta_clear :: s_refill_req :: s_refill_resp :: s_drain_rpq :: Nil = Enum(7) { UFix() } + val s_invalid :: s_wb_req :: s_wb_resp :: s_meta_clear :: s_refill_req :: s_refill_resp :: s_meta_write :: s_drain_rpq :: Nil = Enum(8) { UFix() } val state = Reg(resetVal = s_invalid) - val flush = Reg { Bool() } val xacx_type = Reg { UFix() } val line_state = Reg { UFix() } @@ -195,15 +173,16 @@ class MSHR(id: Int)(implicit conf: DCacheConfig) extends Component { val req = Reg { new MSHRReq() } val req_cmd = io.req_bits.cmd - val req_use_rpq = (req_cmd != M_PFR) && (req_cmd != M_PFW) && (req_cmd != M_FLA) - val idx_match = req.idx === io.req_bits.idx - val sec_rdy = idx_match && !flush && (state === s_wb_req || state === s_wb_resp || state === s_meta_clear || (state === s_refill_req || state === s_refill_resp) && !conf.co.needsTransactionOnSecondaryMiss(req_cmd, io.mem_req.bits)) + val req_use_rpq = req_cmd != M_PFR && req_cmd != M_PFW + val req_idx = req.addr(conf.untagbits-1,conf.offbits) + val idx_match = req_idx === io.req_bits.addr(conf.untagbits-1,conf.offbits) + val sec_rdy = idx_match && (state === s_wb_req || state === s_wb_resp || state === s_meta_clear || (state === s_refill_req || state === s_refill_resp) && !conf.co.needsTransactionOnSecondaryMiss(req_cmd, io.mem_req.bits)) - val rpq = (new Queue(conf.nrpq)) { new RPQEntry } + val rpq = (new Queue(conf.nrpq)) { new Replay } rpq.io.enq.valid := (io.req_pri_val && io.req_pri_rdy || io.req_sec_val && sec_rdy) && req_use_rpq rpq.io.enq.bits := io.req_bits rpq.io.enq.bits.sdq_id := io.req_sdq_id - rpq.io.deq.ready := io.replay.ready && (state === s_drain_rpq) || (state === s_invalid) + rpq.io.deq.ready := io.replay.ready && state === s_drain_rpq || state === s_invalid val abort = io.mem_abort.valid && io.mem_abort.bits.tile_xact_id === UFix(id) val reply = io.mem_rep.valid && io.mem_rep.bits.tile_xact_id === UFix(id) @@ -214,11 +193,14 @@ class MSHR(id: Int)(implicit conf: DCacheConfig) extends Component { finish_q.io.enq.valid := wb_done || refill_done finish_q.io.enq.bits.global_xact_id := io.mem_rep.bits.global_xact_id - when (state === s_drain_rpq && !rpq.io.deq.valid && !finish_q.io.deq.valid && io.meta_req.ready) { + when (state === s_drain_rpq && !rpq.io.deq.valid && !finish_q.io.deq.valid) { state := s_invalid } + when (state === s_meta_write && io.meta_req.ready) { + state := s_drain_rpq + } when (state === s_refill_resp) { - when (refill_done) { state := s_drain_rpq } + when (refill_done) { state := s_meta_write } when (reply) { refill_count := refill_count + UFix(1) line_state := conf.co.newStateOnTransactionReply(io.mem_rep.bits, io.mem_req.bits) @@ -226,8 +208,7 @@ class MSHR(id: Int)(implicit conf: DCacheConfig) extends Component { when (abort) { state := s_refill_req } } when (state === s_refill_req) { - when (flush) { state := s_drain_rpq } - .elsewhen (abort) { state := s_refill_req } + when (abort) { state := s_refill_req } .elsewhen (io.mem_req.ready) { state := s_refill_resp } } when (state === s_meta_clear && io.meta_req.ready) { @@ -246,51 +227,64 @@ class MSHR(id: Int)(implicit conf: DCacheConfig) extends Component { xacx_type := conf.co.getTransactionInitTypeOnSecondaryMiss(req_cmd, conf.co.newStateOnFlush(), io.mem_req.bits) } when ((state === s_invalid) && io.req_pri_val) { - flush := req_cmd === M_FLA line_state := conf.co.newStateOnFlush() refill_count := UFix(0) xacx_type := conf.co.getTransactionInitTypeOnPrimaryMiss(req_cmd, conf.co.newStateOnFlush()) req := io.req_bits - - when (io.req_bits.tag_miss) { - state := Mux(io.req_bits.old_dirty, s_wb_req, s_refill_req) - } + state := Mux(io.req_bits.old_dirty, s_wb_req, s_refill_req) } io.idx_match := (state != s_invalid) && idx_match - io.idx := req.idx - io.tag := req.tag - io.way_oh := req.way_oh - io.refill_count := refill_count + io.mem_resp := req + io.mem_resp.addr := Cat(req_idx, refill_count) << conf.ramoffbits + io.tag := req.addr >> conf.untagbits io.req_pri_rdy := (state === s_invalid) io.req_sec_rdy := sec_rdy && rpq.io.enq.ready - io.meta_req.valid := (state === s_drain_rpq) && !rpq.io.deq.valid && !finish_q.io.deq.valid || (state === s_meta_clear) - io.meta_req.bits.rw := Bool(true) - io.meta_req.bits.idx := req.idx + io.meta_req.valid := state === s_meta_write || state === s_meta_clear || state === s_drain_rpq + io.meta_req.bits.rw := state != s_drain_rpq + io.meta_req.bits.idx := req_idx io.meta_req.bits.data.state := Mux(state === s_meta_clear, conf.co.newStateOnFlush(), line_state) - io.meta_req.bits.data.tag := req.tag - io.meta_req.bits.way_en := req.way_oh + io.meta_req.bits.data.tag := io.tag + io.meta_req.bits.way_en := req.way_en io.wb_req.valid := (state === s_wb_req) && !(io.probe_writeback.valid && idx_match) io.wb_req.bits.tag := req.old_tag - io.wb_req.bits.idx := req.idx - io.wb_req.bits.way_oh := req.way_oh + io.wb_req.bits.idx := req_idx + io.wb_req.bits.way_en := req.way_en io.wb_req.bits.tile_xact_id := Bits(id) io.probe_writeback.ready := (state != s_wb_resp && state != s_meta_clear && state != s_drain_rpq) || !idx_match io.probe_refill.ready := (state != s_refill_resp && state != s_drain_rpq) || !idx_match - io.mem_req.valid := (state === s_refill_req) && !flush + io.mem_req.valid := state === s_refill_req io.mem_req.bits.x_type := xacx_type - io.mem_req.bits.addr := Cat(req.tag, req.idx).toUFix + io.mem_req.bits.addr := Cat(io.tag, req_idx).toUFix io.mem_req.bits.tile_xact_id := Bits(id) io.mem_finish <> finish_q.io.deq - io.replay.valid := (state === s_drain_rpq) && rpq.io.deq.valid - io.replay.bits <> rpq.io.deq.bits - io.replay.bits.idx := req.idx - io.replay.bits.way_oh := req.way_oh + io.replay.valid := state === s_drain_rpq && rpq.io.deq.valid + io.replay.bits := rpq.io.deq.bits + io.replay.bits.phys := Bool(true) + io.replay.bits.addr := Cat(io.tag, req_idx, rpq.io.deq.bits.addr(conf.offbits-1,0)).toUFix + + // don't issue back-to-back replays with store->load dependence + val r1_replay_valid = Reg(rpq.io.deq.fire()) + val r2_replay_valid = Reg(r1_replay_valid) + val (r1_replay, r2_replay) = (Reg{new Replay}, Reg{new Replay}) + when (rpq.io.deq.fire()) { r1_replay := rpq.io.deq.bits } + when (r1_replay_valid) { r2_replay := r1_replay } + def offsetMatch(dst: HellaCacheReq, src: HellaCacheReq) = { + def mask(x: HellaCacheReq) = StoreGen(x.typ, x.addr, Bits(0)).mask + // TODO: this is overly restrictive + dst.addr(conf.offbits-1,conf.wordoffbits) === src.addr(conf.offbits-1,conf.wordoffbits) + // && (mask(dst) & mask(src)).orR + } + when (r1_replay_valid && offsetMatch(io.replay.bits, r1_replay) || + r2_replay_valid && offsetMatch(io.replay.bits, r2_replay)) { + rpq.io.deq.ready := Bool(false) + io.replay.bits.cmd := M_FENCE // NOP + } } class MSHRFile(implicit conf: DCacheConfig) extends Component { @@ -298,37 +292,30 @@ class MSHRFile(implicit conf: DCacheConfig) extends Component { val req = (new FIFOIO) { new MSHRReq }.flip val secondary_miss = Bool(OUTPUT) - val mem_resp_idx = Bits(OUTPUT, conf.idxbits) - val mem_resp_offset = Bits(OUTPUT, log2Up(REFILL_CYCLES)) - val mem_resp_way_oh = Bits(OUTPUT, conf.ways) - - val fence_rdy = Bool(OUTPUT) - val mem_req = (new FIFOIO) { new TransactionInit } + val mem_resp = new DataWriteReq().asOutput val meta_req = (new FIFOIO) { new MetaArrayReq() } - val data_req = (new FIFOIO) { new DataReq() } + val replay = (new FIFOIO) { new Replay } val mem_abort = (new PipeIO) { new TransactionAbort }.flip val mem_rep = (new PipeIO) { new TransactionReply }.flip val mem_finish = (new FIFOIO) { new TransactionFinish } val wb_req = (new FIFOIO) { new WritebackReq } val probe = (new FIFOIO) { Bool() }.flip - val cpu_resp_val = Bool(OUTPUT) - val cpu_resp_tag = Bits(OUTPUT, conf.reqtagbits) + val fence_rdy = Bool(OUTPUT) } val sdq_val = Reg(resetVal = Bits(0, conf.nsdq)) val sdq_alloc_id = PriorityEncoder(~sdq_val(conf.nsdq-1,0)) val sdq_rdy = !sdq_val.andR - val (req_read, req_write) = cpuCmdToRW(io.req.bits.cmd) - val sdq_enq = io.req.valid && io.req.ready && req_write + val sdq_enq = io.req.valid && io.req.ready && isWrite(io.req.bits.cmd) val sdq = Mem(conf.nsdq) { io.req.bits.data.clone } when (sdq_enq) { sdq(sdq_alloc_id) := io.req.bits.data } val idxMatch = Vec(conf.nmshr) { Bool() } val tagList = Vec(conf.nmshr) { Bits() } val wbTagList = Vec(conf.nmshr) { Bits() } - val memRespMux = Vec(conf.nmshr) { new DataArrayReq } + val memRespMux = Vec(conf.nmshr) { new DataWriteReq } val meta_req_arb = (new Arbiter(conf.nmshr)) { new MetaArrayReq() } val mem_req_arb = (new Arbiter(conf.nmshr)) { new TransactionInit } val mem_finish_arb = (new Arbiter(conf.nmshr)) { new TransactionFinish } @@ -336,8 +323,8 @@ class MSHRFile(implicit conf: DCacheConfig) extends Component { val replay_arb = (new Arbiter(conf.nmshr)) { new Replay() } val alloc_arb = (new Arbiter(conf.nmshr)) { Bool() } - val tag_match = Mux1H(idxMatch, tagList) === io.req.bits.tag - val wb_probe_match = Mux1H(idxMatch, wbTagList) === io.req.bits.tag + val tag_match = Mux1H(idxMatch, tagList) === io.req.bits.addr >> conf.untagbits + val wb_probe_match = Mux1H(idxMatch, wbTagList) === io.req.bits.addr >> conf.untagbits var idx_match = Bool(false) var pri_rdy = Bool(false) @@ -371,9 +358,7 @@ class MSHRFile(implicit conf: DCacheConfig) extends Component { mshr.io.mem_abort <> io.mem_abort mshr.io.mem_rep <> io.mem_rep - memRespMux(i).idx := mshr.io.idx - memRespMux(i).offset := mshr.io.refill_count - memRespMux(i).way_en := mshr.io.way_oh + memRespMux(i) := mshr.io.mem_resp pri_rdy = pri_rdy || mshr.io.req_pri_rdy sec_rdy = sec_rdy || mshr.io.req_sec_rdy @@ -392,27 +377,18 @@ class MSHRFile(implicit conf: DCacheConfig) extends Component { io.req.ready := Mux(idx_match, tag_match && sec_rdy, pri_rdy) && sdq_rdy io.secondary_miss := idx_match - val memResp = memRespMux(io.mem_rep.bits.tile_xact_id) - io.mem_resp_idx := memResp.idx - io.mem_resp_offset := memResp.offset - io.mem_resp_way_oh := memResp.way_en + io.mem_resp := memRespMux(io.mem_rep.bits.tile_xact_id) io.fence_rdy := !fence io.probe.ready := (refill_probe_rdy || !tag_match) && (writeback_probe_rdy || !wb_probe_match) - val replay = Queue(replay_arb.io.out, 1, pipe = true) - replay.ready := io.data_req.ready - io.data_req <> replay + val free_sdq = io.replay.fire() && isWrite(io.replay.bits.cmd) + io.replay.bits.data := sdq(RegEn(replay_arb.io.out.bits.sdq_id, free_sdq)) + io.replay <> replay_arb.io.out - val (replay_read, replay_write) = cpuCmdToRW(replay.bits.cmd) - val sdq_free = replay.valid && replay.ready && replay_write - sdq_val := sdq_val & ~((UFix(1) << replay.bits.sdq_id) & Fill(sdq_free, conf.nsdq)) | - PriorityEncoderOH(~sdq_val(conf.nsdq-1,0)) & Fill(conf.nsdq, sdq_enq && io.req.bits.tag_miss) - val sdq_rdata = Reg() { io.req.bits.data.clone } - sdq_rdata := sdq(Mux(replay.valid && !replay.ready, replay.bits.sdq_id, replay_arb.io.out.bits.sdq_id)) - io.data_req.bits.data := sdq_rdata - - io.cpu_resp_val := Reg(replay.valid && replay.ready && replay_read, resetVal = Bool(false)) - io.cpu_resp_tag := Reg(replay.bits.cpu_tag) + when (io.replay.valid || sdq_enq) { + sdq_val := sdq_val & ~(UFixToOH(io.replay.bits.sdq_id) & Fill(conf.nsdq, free_sdq)) | + PriorityEncoderOH(~sdq_val(conf.nsdq-1,0)) & Fill(conf.nsdq, sdq_enq) + } } @@ -420,7 +396,8 @@ class WritebackUnit(implicit conf: DCacheConfig) extends Component { val io = new Bundle { val req = (new FIFOIO) { new WritebackReq() }.flip val probe = (new FIFOIO) { new WritebackReq() }.flip - val data_req = (new FIFOIO) { new DataArrayReq() } + val meta_req = (new FIFOIO) { new MetaArrayReq } + val data_req = (new FIFOIO) { new DataReadReq() } val data_resp = Bits(INPUT, MEM_DATA_BITS) val mem_req = (new FIFOIO) { new TransactionInit } val mem_req_data = (new FIFOIO) { new TransactionInitData } @@ -430,6 +407,7 @@ class WritebackUnit(implicit conf: DCacheConfig) extends Component { val valid = Reg(resetVal = Bool(false)) val is_probe = Reg() { Bool() } val data_req_fired = Reg(resetVal = Bool(false)) + val r_data_req_fired = Reg(data_req_fired, resetVal = Bool(false)) val cmd_sent = Reg() { Bool() } val cnt = Reg() { UFix(width = log2Up(REFILL_CYCLES+1)) } val req = Reg() { new WritebackReq() } @@ -439,7 +417,7 @@ class WritebackUnit(implicit conf: DCacheConfig) extends Component { when (valid && io.mem_req.ready) { cmd_sent := Bool(true) } - when (io.data_req.valid && io.data_req.ready) { + when (io.data_req.fire()) { data_req_fired := Bool(true) cnt := cnt + UFix(1) } @@ -465,23 +443,26 @@ class WritebackUnit(implicit conf: DCacheConfig) extends Component { req := io.req.bits } + val fire = valid && cnt < UFix(REFILL_CYCLES) io.req.ready := !valid && !io.probe.valid io.probe.ready := !valid - io.data_req.valid := valid && (cnt < UFix(REFILL_CYCLES)) - io.data_req.bits.way_en := req.way_oh - io.data_req.bits.idx := req.idx - io.data_req.bits.offset := cnt - io.data_req.bits.rw := Bool(false) - io.data_req.bits.wmask := Bits(0) - io.data_req.bits.data := Bits(0) + io.data_req.valid := fire && io.meta_req.ready + io.data_req.bits.way_en := req.way_en + io.data_req.bits.addr := Cat(req.idx, cnt(log2Up(REFILL_CYCLES)-1,0)) << conf.ramoffbits + + io.meta_req.valid := fire && io.data_req.ready + io.meta_req.bits.way_en := Fix(-1) + io.meta_req.bits.rw := Bool(false) + io.meta_req.bits.idx := req.idx + io.meta_req.bits.data.tag := req.tag io.mem_req.valid := valid && !cmd_sent io.mem_req.bits.x_type := conf.co.getTransactionInitTypeOnWriteback() io.mem_req.bits.addr := Cat(req.tag, req.idx).toUFix io.mem_req.bits.tile_xact_id := req.tile_xact_id - io.mem_req_data.valid := data_req_fired && !is_probe + io.mem_req_data.valid := r_data_req_fired && !is_probe io.mem_req_data.bits.data := io.data_resp - io.probe_rep_data.valid := data_req_fired && is_probe + io.probe_rep_data.valid := r_data_req_fired && is_probe io.probe_rep_data.bits.data := io.data_resp } @@ -492,129 +473,93 @@ class ProbeUnit(implicit conf: DCacheConfig) extends Component { val meta_req = (new FIFOIO) { new MetaArrayReq } val mshr_req = (new FIFOIO) { Bool() } val wb_req = (new FIFOIO) { new WritebackReq } - val tag_match_way_oh = Bits(INPUT, conf.ways) + val way_en = Bits(INPUT, conf.ways) val line_state = UFix(INPUT, 2) val addr = Bits(OUTPUT, conf.lineaddrbits) } - val s_reset :: s_invalid :: s_meta_req :: s_meta_resp :: s_mshr_req :: s_probe_rep :: s_writeback_req :: s_writeback_resp :: Nil = Enum(8) { UFix() } - val state = Reg(resetVal = s_reset) + val s_invalid :: s_meta_req :: s_meta_resp :: s_mshr_req :: s_probe_rep :: s_writeback_req :: s_writeback_resp :: s_meta_write :: Nil = Enum(8) { UFix() } + val state = Reg(resetVal = s_invalid) val line_state = Reg() { UFix() } - val way_oh = Reg() { Bits() } + val way_en = Reg() { Bits() } val req = Reg() { new ProbeRequest() } - val hit = way_oh.orR + val hit = way_en.orR - when ((state === s_writeback_resp) && io.wb_req.ready) { + when (state === s_meta_write && io.meta_req.ready) { state := s_invalid } - when ((state === s_writeback_req) && io.wb_req.ready) { + when (state === s_writeback_resp && io.wb_req.ready) { + state := s_meta_write + } + when (state === s_writeback_req && io.wb_req.ready) { state := s_writeback_resp } - when ((state === s_probe_rep) && io.meta_req.ready && io.rep.ready) { - state := Mux(hit && conf.co.needsWriteback(line_state), s_writeback_req, s_invalid) + when (state === s_probe_rep && io.rep.ready) { + state := s_invalid + when (hit) { + state := Mux(conf.co.needsWriteback(line_state), s_writeback_req, s_meta_write) + } } - when ((state === s_mshr_req) && io.mshr_req.ready) { - state := s_meta_req + when (state === s_mshr_req) { + state := s_probe_rep + line_state := io.line_state + way_en := io.way_en + when (!io.mshr_req.ready) { state := s_meta_req } } when (state === s_meta_resp) { - way_oh := io.tag_match_way_oh - line_state := io.line_state - state := Mux(!io.mshr_req.ready, s_mshr_req, s_probe_rep) + state := s_mshr_req } - when ((state === s_meta_req) && io.meta_req.ready) { + when (state === s_meta_req && io.meta_req.ready) { state := s_meta_resp } - when ((state === s_invalid) && io.req.valid) { + when (state === s_invalid && io.req.valid) { state := s_meta_req req := io.req.bits } - when (state === s_reset) { state := s_invalid } - io.req.ready := state === s_invalid - io.rep.valid := state === s_probe_rep && io.meta_req.ready + io.req.ready := state === s_invalid && !reset + io.rep.valid := state === s_probe_rep io.rep.bits := conf.co.newProbeReply(req, Mux(hit, line_state, conf.co.newStateOnFlush)) - io.meta_req.valid := state === s_meta_req || state === s_meta_resp || state === s_mshr_req || state === s_probe_rep && hit - io.meta_req.bits.way_en := Mux(state === s_probe_rep, way_oh, Fix(-1)) - io.meta_req.bits.rw := state === s_probe_rep + io.meta_req.valid := state === s_meta_req || state === s_meta_write + io.meta_req.bits.way_en := Mux(state === s_meta_write, way_en, Fix(-1)) + io.meta_req.bits.rw := state === s_meta_write io.meta_req.bits.idx := req.addr io.meta_req.bits.data.state := conf.co.newStateOnProbeRequest(req, line_state) io.meta_req.bits.data.tag := req.addr >> UFix(conf.idxbits) - io.mshr_req.valid := state === s_meta_resp || state === s_mshr_req + io.mshr_req.valid := state === s_mshr_req io.addr := req.addr io.wb_req.valid := state === s_writeback_req - io.wb_req.bits.way_oh := way_oh + io.wb_req.bits.way_en := way_en io.wb_req.bits.idx := req.addr io.wb_req.bits.tag := req.addr >> UFix(conf.idxbits) } -class FlushUnit(lines: Int)(implicit conf: DCacheConfig) extends Component { - val io = new Bundle { - val req = (new FIFOIO) { Bool() }.flip - val meta_req = (new FIFOIO) { new MetaArrayReq() } - val mshr_req = (new FIFOIO) { Bool() } - } - - val s_reset :: s_ready :: s_meta_read :: s_meta_wait :: Nil = Enum(4) { UFix() } - val state = Reg(resetVal = s_reset) - val idx_cnt = Reg(resetVal = UFix(0, log2Up(lines))) - val next_idx_cnt = idx_cnt + UFix(1) - val way_cnt = if (conf.dm) UFix(0) else Reg(resetVal = UFix(0, conf.waybits)) - val next_way_cnt = way_cnt + UFix(1) - - switch (state) { - is(s_reset) { - when (io.meta_req.ready) { - state := Mux(way_cnt === UFix(conf.ways-1) && idx_cnt.andR, s_ready, s_reset); - when (way_cnt === UFix(conf.ways-1)) { idx_cnt := next_idx_cnt }; - if (!conf.dm) way_cnt := next_way_cnt; - } - } - is(s_ready) { when (io.req.valid) { state := s_meta_read } } - is(s_meta_read) { when (io.meta_req.ready) { state := s_meta_wait } } - is(s_meta_wait) { - state := s_meta_read - when (io.mshr_req.ready) { - state := s_meta_read - when (way_cnt === UFix(conf.ways-1)) { - when (idx_cnt.andR) { - state := s_ready - } - idx_cnt := next_idx_cnt - } - if (!conf.dm) way_cnt := next_way_cnt; - } - } - } - - io.req.ready := state === s_ready - io.mshr_req.valid := state === s_meta_wait - io.meta_req.valid := (state === s_meta_read) || (state === s_reset) - io.meta_req.bits.way_en := UFixToOH(way_cnt) - io.meta_req.bits.idx := idx_cnt - io.meta_req.bits.rw := (state === s_reset) - io.meta_req.bits.data.state := conf.co.newStateOnFlush() - io.meta_req.bits.data.tag := UFix(0) -} - -class MetaDataArrayArray(lines: Int)(implicit conf: DCacheConfig) extends Component { +class MetaDataArray(implicit conf: DCacheConfig) extends Component { val io = new Bundle { val req = (new FIFOIO) { new MetaArrayReq() }.flip val resp = Vec(conf.ways){ (new MetaData).asOutput } - val state_req = (new FIFOIO) { new MetaArrayReq() }.flip + val state_req = (new PipeIO) { new MetaArrayReq() }.flip val way_en = Bits(OUTPUT, conf.ways) } + val rst_cnt = Reg(resetVal = UFix(0, log2Up(conf.sets+1))) + val rst = rst_cnt < conf.sets + when (rst) { rst_cnt := rst_cnt+1 } + val permBits = io.req.bits.data.state.width - val perms = Mem(lines) { UFix(width = permBits*conf.ways) } - val tags = Mem(lines, seqRead = true) { Bits(width = conf.tagbits*conf.ways) } + val perms = Mem(conf.sets) { UFix(width = permBits*conf.ways) } + val tags = Mem(conf.sets, seqRead = true) { Bits(width = conf.tagbits*conf.ways) } val tag = Reg() { Bits() } val raddr = Reg() { Bits() } val way_en_ = Reg { Bits(width = conf.ways) } - when (io.state_req.valid && io.state_req.bits.rw) { - perms.write(io.state_req.bits.idx, Fill(conf.ways, io.state_req.bits.data.state), FillInterleaved(permBits, io.state_req.bits.way_en)) + when (rst || io.state_req.valid && io.state_req.bits.rw) { + val addr = Mux(rst, rst_cnt, io.state_req.bits.idx) + val data = Mux(rst, conf.co.newStateOnFlush, io.state_req.bits.data.state) + val mask = Mux(rst, Fix(-1), io.state_req.bits.way_en) + perms.write(addr, Fill(conf.ways, data), FillInterleaved(permBits, mask)) } when (io.req.valid) { when (io.req.bits.rw) { @@ -635,82 +580,71 @@ class MetaDataArrayArray(lines: Int)(implicit conf: DCacheConfig) extends Compon } io.way_en := way_en_ - io.req.ready := Bool(true) - io.state_req.ready := Bool(true) + io.req.ready := !rst } -class DataArray(lines: Int)(implicit conf: DCacheConfig) extends Component { +class DataArray(implicit conf: DCacheConfig) extends Component { val io = new Bundle { - val req = (new FIFOIO) { new DataArrayReq() }.flip - val resp = Bits(width = MEM_DATA_BITS, dir = OUTPUT) - } - - val wmask = FillInterleaved(8, io.req.bits.wmask) - val addr = Cat(io.req.bits.idx, io.req.bits.offset) - val rdata = Reg() { Bits() } - - val array = Mem(lines*REFILL_CYCLES, seqRead = true){ Bits(width=MEM_DATA_BITS) } - when (io.req.valid) { - when (io.req.bits.rw) { array.write(addr, io.req.bits.data, wmask) } - .otherwise { rdata := array(addr) } - } - - io.resp := rdata - io.req.ready := Bool(true) -} - -class DataArrayArray(lines: Int)(implicit conf: DCacheConfig) extends Component { - val io = new Bundle { - val req = (new FIFOIO) { new DataArrayReq() }.flip + val read = new FIFOIO()(new DataReadReq).flip + val write = new FIFOIO()(new DataWriteReq).flip val resp = Vec(conf.ways){ Bits(OUTPUT, MEM_DATA_BITS) } - val way_en = Bits(OUTPUT, conf.ways) } - val way_en_ = Reg { Bits(width = conf.ways) } - when (io.req.valid && io.req.ready) { - way_en_ := io.req.bits.way_en - } + val wmask = FillInterleaved(conf.databits, io.write.bits.wmask) + val waddr = io.write.bits.addr >> conf.ramoffbits + val raddr = io.read.bits.addr >> conf.ramoffbits for (w <- 0 until conf.ways) { - val way = new DataArray(lines) - way.io.req.bits <> io.req.bits - way.io.req.valid := io.req.valid && io.req.bits.way_en(w).toBool - way.io.resp <> io.resp(w) + val rdata = Reg() { Bits() } + val array = Mem(conf.sets*REFILL_CYCLES, seqRead = true){ Bits(width=MEM_DATA_BITS) } + when (io.write.bits.way_en(w) && io.write.valid) { + array.write(waddr, io.write.bits.data, wmask) + } + when (io.read.bits.way_en(w) && io.read.valid) { + rdata := array(raddr) + } + io.resp(w) := rdata } - io.way_en := way_en_ - io.req.ready := Bool(true) + io.read.ready := Bool(true) + io.write.ready := Bool(true) } -class AMOALU extends Component { +class AMOALU(implicit conf: DCacheConfig) extends Component { val io = new Bundle { + val lhs_raw = Bits(INPUT, conf.databits) + val addr = Bits(INPUT, conf.offbits) val cmd = Bits(INPUT, 4) val typ = Bits(INPUT, 3) - val lhs = UFix(INPUT, 64) - val rhs = UFix(INPUT, 64) - val out = UFix(OUTPUT, 64) + val lhs = Bits(INPUT, conf.databits) + val rhs = Bits(INPUT, conf.databits) + val out = Bits(OUTPUT, conf.databits) } + + require(conf.databytes == 8) val sgned = (io.cmd === M_XA_MIN) || (io.cmd === M_XA_MAX) - val sub = (io.cmd === M_XA_MIN) || (io.cmd === M_XA_MINU) || (io.cmd === M_XA_MAX) || (io.cmd === M_XA_MAXU) + val minmax = (io.cmd === M_XA_MIN) || (io.cmd === M_XA_MINU) || (io.cmd === M_XA_MAX) || (io.cmd === M_XA_MAXU) val min = (io.cmd === M_XA_MIN) || (io.cmd === M_XA_MINU) val word = (io.typ === MT_W) || (io.typ === MT_WU) - val adder_out = (Cat(io.lhs, UFix(0,1)).toUFix + Cat(io.rhs ^ Fill(io.rhs.width, sub), sub).toUFix) >> UFix(1) + val adder_out = io.lhs + io.rhs val cmp_lhs = Mux(word, io.lhs(31), io.lhs(63)) val cmp_rhs = Mux(word, io.rhs(31), io.rhs(63)) - val cmp_diff = Mux(word, adder_out(31), adder_out(63)) + val cmp_diff = Mux(word, io.lhs(31,0) < io.rhs(31,0), io.lhs < io.rhs) val less = Mux(cmp_lhs === cmp_rhs, cmp_diff, Mux(sgned, cmp_lhs, cmp_rhs)) val cmp_out = Mux(min === less, io.lhs, io.rhs) val out = Mux(io.cmd === M_XA_ADD, adder_out, - Mux(io.cmd === M_XA_SWAP, io.rhs, Mux(io.cmd === M_XA_AND, io.lhs & io.rhs, Mux(io.cmd === M_XA_OR, io.lhs | io.rhs, - /* MIN[U]/MAX[U] */ cmp_out)))); + Mux(minmax, cmp_out, + io.rhs)))) - io.out := Mux(word, Cat(out(31,0), out(31,0)).toUFix, out) + val wdata = Mux(word, Cat(out(31,0), out(31,0)), out) + val wmask = FillInterleaved(8, StoreGen(io.typ, io.addr, Bits(0)).mask) + io.out := wmask & wdata | ~wmask & io.lhs_raw } class HellaCacheReq(implicit conf: DCacheConfig) extends Bundle { @@ -726,13 +660,12 @@ class HellaCacheReq(implicit conf: DCacheConfig) extends Bundle { } class HellaCacheResp(implicit conf: DCacheConfig) extends Bundle { - val miss = Bool() - val nack = Bool() + val nack = Bool() // comes 2 cycles after req.fire val replay = Bool() - val typ = Bits(width = 3) - val data = Bits(width = conf.databits) + val typ = Bits(width = 3) + val data = Bits(width = conf.databits) val data_subword = Bits(width = conf.databits) - val tag = Bits(width = conf.reqtagbits) + val tag = Bits(width = conf.reqtagbits) override def clone = new HellaCacheResp().asInstanceOf[this.type] } @@ -761,297 +694,241 @@ class HellaCache(implicit conf: DCacheConfig) extends Component { val mem = new ioTileLink } - val lines = 1 << conf.idxbits - val indexbits = conf.idxbits - val tagmsb = conf.paddrbits-1 - val taglsb = indexbits+conf.offbits - val tagbits = tagmsb-taglsb+1 - val indexmsb = taglsb-1 + val indexmsb = conf.untagbits-1 val indexlsb = conf.offbits val offsetmsb = indexlsb-1 val offsetlsb = log2Up(conf.databytes) - val ramindexlsb = log2Up(MEM_DATA_BITS/8) - - val early_nack = Reg { Bool() } - val r_cpu_req_val_ = Reg(io.cpu.req.valid && io.cpu.req.ready, resetVal = Bool(false)) - val r_cpu_req_val = r_cpu_req_val_ && !io.cpu.req.bits.kill && !early_nack - val r_cpu_req_phys = Reg() { Bool() } - val r_cpu_req_vpn = Reg() { UFix() } - val r_cpu_req_idx = Reg() { Bits() } - val r_cpu_req_cmd = Reg() { Bits() } - val r_cpu_req_type = Reg() { Bits() } - val r_cpu_req_tag = Reg() { Bits() } - val r_amo_replay_data = Reg() { Bits() } - val r_way_oh = Reg() { Bits() } - - val p_store_valid = Reg(resetVal = Bool(false)) - val p_store_data = Reg() { Bits() } - val p_store_idx = Reg() { Bits() } - val p_store_cmd = Reg() { Bits() } - val p_store_type = Reg() { Bits() } - val p_store_way_oh = Reg() { Bits() } - val r_replay_amo = Reg(resetVal = Bool(false)) - - val req_store = (io.cpu.req.bits.cmd === M_XWR) - val req_load = (io.cpu.req.bits.cmd === M_XRD) - val req_amo = io.cpu.req.bits.cmd(3).toBool - val req_read = req_load || req_amo - val req_write = req_store || req_amo - val r_req_load = (r_cpu_req_cmd === M_XRD) - val r_req_store = (r_cpu_req_cmd === M_XWR) - val r_req_flush = (r_cpu_req_cmd === M_FLA) - val r_req_fence = (r_cpu_req_cmd === M_FENCE) - val r_req_prefetch = (r_cpu_req_cmd === M_PFR) || (r_cpu_req_cmd === M_PFW) - val r_req_amo = r_cpu_req_cmd(3).toBool - val r_req_read = r_req_load || r_req_amo - val r_req_write = r_req_store || r_req_amo - val r_req_readwrite = r_req_read || r_req_write || r_req_prefetch - val nack_hit = Bool() - - val dtlb = new TLB(8) - dtlb.io.ptw <> io.cpu.ptw - dtlb.io.req.valid := r_cpu_req_val && r_req_readwrite && !r_cpu_req_phys - dtlb.io.req.bits.passthrough := r_cpu_req_phys - dtlb.io.req.bits.asid := UFix(0) - dtlb.io.req.bits.vpn := r_cpu_req_vpn - dtlb.io.req.bits.instruction := Bool(false) val wb = new WritebackUnit val prober = new ProbeUnit val mshr = new MSHRFile - val flusher = new FlushUnit(lines) - val replay_amo_val = mshr.io.data_req.valid && mshr.io.data_req.bits.cmd(3).toBool - // reset and flush unit - val flushed = Reg(resetVal = Bool(true)) - flushed := flushed && (!r_cpu_req_val || r_req_flush) || r_cpu_req_val && r_req_flush && mshr.io.fence_rdy && flusher.io.req.ready - flusher.io.req.valid := r_cpu_req_val && r_req_flush && mshr.io.fence_rdy && !flushed - flusher.io.mshr_req.ready := mshr.io.req.ready + io.cpu.req.ready := Bool(true) + val s1_valid = Reg(io.cpu.req.fire(), resetVal = Bool(false)) + val s1_valid_masked = s1_valid && !io.cpu.req.bits.kill + val s1_replay = Reg(resetVal = Bool(false)) + val s1_req = Reg{io.cpu.req.bits.clone} + val s2_req = Reg{io.cpu.req.bits.clone} + + val s2_valid = Reg(s1_valid_masked, resetVal = Bool(false)) + val s2_replay = Reg(s1_replay, resetVal = Bool(false)) + val s2_valid_masked = Bool() + val s2_nack_hit = Bool() + + val s3_valid = Reg(resetVal = Bool(false)) + val s3_req = Reg{io.cpu.req.bits.clone} + val s3_way = Reg{Bits()} + + val s1_read = isRead(s1_req.cmd) + val s1_write = isWrite(s1_req.cmd) + val s1_readwrite = s1_read || s1_write + + val dtlb = new TLB(8) + dtlb.io.ptw <> io.cpu.ptw + dtlb.io.req.valid := s1_valid_masked && s1_readwrite && !s1_req.phys + dtlb.io.req.bits.passthrough := s1_req.phys + dtlb.io.req.bits.asid := UFix(0) + dtlb.io.req.bits.vpn := s1_req.addr >> conf.pgidxbits + dtlb.io.req.bits.instruction := Bool(false) + when (!dtlb.io.req.ready && !io.cpu.req.bits.phys) { io.cpu.req.ready := Bool(false) } when (io.cpu.req.valid) { - r_cpu_req_phys := io.cpu.req.bits.phys - r_cpu_req_vpn := io.cpu.req.bits.addr >> taglsb - r_cpu_req_idx := io.cpu.req.bits.addr(indexmsb,0) - r_cpu_req_cmd := io.cpu.req.bits.cmd - r_cpu_req_type := io.cpu.req.bits.typ - r_cpu_req_tag := io.cpu.req.bits.tag + s1_req := io.cpu.req.bits + } + when (wb.io.meta_req.valid) { + s1_req.phys := Bool(true) + s1_req.addr := Cat(wb.io.meta_req.bits.data.tag, wb.io.meta_req.bits.idx, UFix(0, conf.offbits)).toUFix } when (prober.io.meta_req.valid) { - r_cpu_req_idx := Cat(prober.io.meta_req.bits.data.tag, prober.io.meta_req.bits.idx, mshr.io.data_req.bits.offset)(conf.pgidxbits-1,0) + s1_req.addr := Cat(prober.io.meta_req.bits.data.tag, prober.io.meta_req.bits.idx, UFix(0, conf.offbits)).toUFix + s1_req.phys := Bool(true) } - when (mshr.io.data_req.valid) { - r_cpu_req_idx := Cat(mshr.io.data_req.bits.idx, mshr.io.data_req.bits.offset) - r_cpu_req_cmd := mshr.io.data_req.bits.cmd - r_cpu_req_type := mshr.io.data_req.bits.typ - r_amo_replay_data := mshr.io.data_req.bits.data - r_way_oh := mshr.io.data_req.bits.way_oh + when (mshr.io.replay.valid) { + s1_req := mshr.io.replay.bits } - when (flusher.io.meta_req.valid) { - r_cpu_req_idx := Cat(flusher.io.meta_req.bits.idx, mshr.io.data_req.bits.offset) - r_cpu_req_cmd := M_FLA - r_way_oh := flusher.io.meta_req.bits.way_en + val s1_addr = Cat(dtlb.io.resp.ppn, s1_req.addr(conf.pgidxbits-1,0)) + + when (s1_valid || s1_replay) { + s2_req.addr := s1_addr + s2_req.typ := s1_req.typ + s2_req.cmd := s1_req.cmd + s2_req.tag := s1_req.tag + when (s1_write) { + s2_req.data := Mux(s1_replay, mshr.io.replay.bits.data, io.cpu.req.bits.data) + } } - val cpu_req_data = Mux(r_replay_amo, r_amo_replay_data, io.cpu.req.bits.data) val misaligned = - (((r_cpu_req_type === MT_H) || (r_cpu_req_type === MT_HU)) && (r_cpu_req_idx(0) != Bits(0))) || - (((r_cpu_req_type === MT_W) || (r_cpu_req_type === MT_WU)) && (r_cpu_req_idx(1,0) != Bits(0))) || - ((r_cpu_req_type === MT_D) && (r_cpu_req_idx(2,0) != Bits(0))); + (((s1_req.typ === MT_H) || (s1_req.typ === MT_HU)) && (s1_req.addr(0) != Bits(0))) || + (((s1_req.typ === MT_W) || (s1_req.typ === MT_WU)) && (s1_req.addr(1,0) != Bits(0))) || + ((s1_req.typ === MT_D) && (s1_req.addr(2,0) != Bits(0))); - io.cpu.xcpt.ma.ld := r_cpu_req_val_ && r_req_read && misaligned - io.cpu.xcpt.ma.st := r_cpu_req_val_ && r_req_write && misaligned - io.cpu.xcpt.pf.ld := r_cpu_req_val_ && r_req_read && dtlb.io.resp.xcpt_ld - io.cpu.xcpt.pf.st := r_cpu_req_val_ && r_req_write && dtlb.io.resp.xcpt_st + io.cpu.xcpt.ma.ld := s1_read && misaligned + io.cpu.xcpt.ma.st := s1_write && misaligned + io.cpu.xcpt.pf.ld := s1_read && dtlb.io.resp.xcpt_ld + io.cpu.xcpt.pf.st := s1_write && dtlb.io.resp.xcpt_st // tags - val meta = new MetaDataArrayArray(lines) + val meta = new MetaDataArray val meta_arb = (new Arbiter(4)) { new MetaArrayReq() } - flusher.io.meta_req <> meta_arb.io.in(0) meta_arb.io.out <> meta.io.req // data - val data = new DataArrayArray(lines) - val data_arb = (new Arbiter(5)) { new DataArrayReq() } - data_arb.io.out <> data.io.req + val data = new DataArray + val readArb = new Arbiter(3)(new DataReadReq) + val writeArb = new Arbiter(2)(new DataWriteReq) + readArb.io.out.ready := !io.mem.xact_rep.valid || io.mem.xact_rep.ready // insert bubble if refill gets blocked + readArb.io.out <> data.io.read + writeArb.io.out <> data.io.write // cpu tag check meta_arb.io.in(3).valid := io.cpu.req.valid meta_arb.io.in(3).bits.idx := io.cpu.req.bits.addr(indexmsb,indexlsb) meta_arb.io.in(3).bits.rw := Bool(false) meta_arb.io.in(3).bits.way_en := Fix(-1) - val early_tag_nack = !meta_arb.io.in(3).ready - val cpu_req_ppn = Mux(prober.io.mshr_req.valid, prober.io.addr >> UFix(conf.pgidxbits-conf.offbits), dtlb.io.resp.ppn) - val cpu_req_tag = Cat(cpu_req_ppn, r_cpu_req_idx)(tagmsb,taglsb) - val tag_match_arr = (0 until conf.ways).map( w => conf.co.isValid(meta.io.resp(w).state) && (meta.io.resp(w).tag === cpu_req_tag)) - val tag_match = Cat(Bits(0),tag_match_arr:_*).orR - val tag_match_way_oh = Cat(Bits(0),tag_match_arr.reverse:_*)(conf.ways-1, 0) //TODO: use Vec - val tag_hit_arr = (0 until conf.ways).map( w => conf.co.isHit(r_cpu_req_cmd, meta.io.resp(w).state) && (meta.io.resp(w).tag === cpu_req_tag)) - val tag_hit = Cat(Bits(0),tag_hit_arr:_*).orR - val meta_resp_way_oh = Mux(meta.io.way_en.andR, tag_match_way_oh, meta.io.way_en) - val data_resp_way_oh = Mux(data.io.way_en.andR, tag_match_way_oh, data.io.way_en) - val meta_resp_mux = Mux1H(meta_resp_way_oh, meta.io.resp) - val data_resp_mux = Mux1H(data_resp_way_oh, data.io.resp) + when (!meta_arb.io.in(3).ready) { io.cpu.req.ready := Bool(false) } + def wayMap[T <: Data](f: Int => T)(gen: => T) = Vec((0 until conf.ways).map(i => f(i))){gen} + val s1_tag_eq_way = wayMap((w: Int) => meta.io.resp(w).tag === (s1_addr >> conf.untagbits)){Bits()}.toBits + val s1_hit_way = wayMap((w: Int) => s1_tag_eq_way(w) && conf.co.isHit(s1_req.cmd, meta.io.resp(w).state)){Bits()}.toBits + val s1_tag_match_way = wayMap((w: Int) => s1_tag_eq_way(w) && conf.co.isValid(meta.io.resp(w).state)){Bits()}.toBits + val s1_hit = s1_hit_way.orR + val s1_clk_en = Reg(meta_arb.io.out.valid) + val s2_tag_match_way = RegEn(s1_tag_match_way, s1_clk_en) + val s2_tag_match = s2_tag_match_way.orR + val s2_hit = Reg(s1_hit) + val s2_data = wayMap((w: Int) => RegEn(data.io.resp(w), s1_clk_en && s1_tag_eq_way(w))){Bits()} + val data_resp_mux = Mux1H(s2_tag_match_way, s2_data) // writeback unit wb.io.req <> mshr.io.wb_req - wb.io.data_req <> data_arb.io.in(3) + wb.io.meta_req <> meta_arb.io.in(2) + wb.io.data_req <> readArb.io.in(1) wb.io.data_resp <> data_resp_mux wb.io.probe_rep_data <> io.mem.probe_rep_data // replacement policy val replacer = new RandomReplacement - val replaced_way_oh = Mux(flusher.io.mshr_req.valid, r_way_oh, UFixToOH(replacer.way)) - val meta_wb_mux = Mux1H(replaced_way_oh, meta.io.resp) + val s1_replaced_way_en = UFixToOH(replacer.way) + val s2_replaced_way_en = UFixToOH(RegEn(replacer.way, s1_clk_en)) + val s2_repl_state = Mux1H(s2_replaced_way_en, wayMap((w: Int) => RegEn(meta.io.resp(w).state, s1_clk_en && s1_replaced_way_en(w))){Bits()}) + val s2_repl_tag = Mux1H(s2_replaced_way_en, wayMap((w: Int) => RegEn(meta.io.resp(w).tag, s1_clk_en && s1_replaced_way_en(w))){Bits()}) + val s2_hit_state = Mux1H(s2_tag_match_way, wayMap((w: Int) => RegEn(meta.io.resp(w).state, s1_clk_en && s1_tag_eq_way(w))){Bits()}) // refill response - data_arb.io.in(0).bits.offset := mshr.io.mem_resp_offset - data_arb.io.in(0).bits.idx := mshr.io.mem_resp_idx - data_arb.io.in(0).bits.rw := Bool(true) - data_arb.io.in(0).bits.wmask := ~UFix(0, MEM_DATA_BITS/8) - data_arb.io.in(0).bits.data := io.mem.xact_rep.bits.data - data_arb.io.in(0).bits.way_en := mshr.io.mem_resp_way_oh - data_arb.io.in(0).valid := io.mem.xact_rep.valid && conf.co.messageUpdatesDataArray(io.mem.xact_rep.bits) + val refill = conf.co.messageUpdatesDataArray(io.mem.xact_rep.bits) + writeArb.io.in(0).valid := io.mem.xact_rep.valid && refill + io.mem.xact_rep.ready := writeArb.io.in(0).ready || !refill + writeArb.io.in(0).bits := mshr.io.mem_resp + writeArb.io.in(0).bits.wmask := Fix(-1) + writeArb.io.in(0).bits.data := io.mem.xact_rep.bits.data // load hits - data_arb.io.in(4).bits.offset := io.cpu.req.bits.addr(offsetmsb,ramindexlsb) - data_arb.io.in(4).bits.idx := io.cpu.req.bits.addr(indexmsb,indexlsb) - data_arb.io.in(4).bits.rw := Bool(false) - data_arb.io.in(4).valid := io.cpu.req.valid && req_read - data_arb.io.in(4).bits.way_en := Fix(-1) // intiate load on all ways, mux after tag check - val early_load_nack = !data_arb.io.in(4).ready + readArb.io.in(2).bits.addr := io.cpu.req.bits.addr + readArb.io.in(2).valid := io.cpu.req.valid + readArb.io.in(2).bits.way_en := Fix(-1) + when (!readArb.io.in(2).ready) { io.cpu.req.ready := Bool(false) } - // store hits and AMO hits and misses use a pending store register. - // we nack new stores if a pending store can't retire for some reason. - // we drain a pending store if the CPU performs a store or a - // conflictig load, or if the cache is idle, or after a miss. - val p_store_idx_match = p_store_valid && (r_cpu_req_idx(indexmsb,indexlsb) === p_store_idx(indexmsb,indexlsb)) - val p_store_offset_match = (r_cpu_req_idx(indexlsb-1,offsetlsb) === p_store_idx(indexlsb-1,offsetlsb)) - val p_store_match = r_cpu_req_val_ && r_req_read && p_store_idx_match && p_store_offset_match - val drain_store_val = (p_store_valid && (!io.cpu.req.valid || req_write || wb.io.data_req.valid || mshr.io.data_req.valid)) || p_store_match - data_arb.io.in(2).bits.offset := p_store_idx(offsetmsb,ramindexlsb) - data_arb.io.in(2).bits.idx := p_store_idx(indexmsb,indexlsb) - data_arb.io.in(2).bits.rw := Bool(true) - data_arb.io.in(2).valid := drain_store_val - data_arb.io.in(2).bits.way_en := p_store_way_oh - val drain_store = drain_store_val && data_arb.io.in(2).ready - val p_amo = Reg(resetVal = Bool(false)) - val p_store_rdy = !(p_store_valid && !drain_store) && !(mshr.io.data_req.valid || r_replay_amo || p_amo) - p_amo := r_cpu_req_val && tag_hit && r_req_amo && mshr.io.req.ready && !nack_hit || r_replay_amo - p_store_valid := p_store_valid && !drain_store || (r_cpu_req_val && tag_hit && r_req_store && mshr.io.req.ready && !nack_hit) || p_amo + // store/amo hits + def idxMatch(dst: HellaCacheReq, src: HellaCacheReq) = dst.addr(indexmsb,indexlsb) === src.addr(indexmsb,indexlsb) + def offsetMatch(dst: HellaCacheReq, src: HellaCacheReq) = { + def mask(x: HellaCacheReq) = StoreGen(x.typ, x.addr, Bits(0)).mask + // TODO: this is overly restrictive. need write-combining buffer. + isWrite(src.cmd) && + dst.addr(indexlsb-1,offsetlsb) === src.addr(indexlsb-1,offsetlsb) && + ((mask(dst) & mask(src)).orR || isWrite(dst.cmd)) + } + def storeMatch(dst: HellaCacheReq, src: HellaCacheReq) = idxMatch(dst, src) && offsetMatch(dst, src) + val p_store_match = s2_valid && storeMatch(s1_req, s2_req) || + s3_valid && storeMatch(s1_req, s3_req) + writeArb.io.in(1).bits.addr := s3_req.addr + writeArb.io.in(1).bits.wmask := UFix(1) << s3_req.addr(conf.ramoffbits-1,offsetlsb).toUFix + writeArb.io.in(1).bits.data := Fill(MEM_DATA_BITS/conf.databits, s3_req.data) + writeArb.io.in(1).valid := s3_valid + writeArb.io.in(1).bits.way_en := s3_way // tag update after a store to an exclusive clean line. - val new_hit_state = conf.co.newStateOnHit(r_cpu_req_cmd, meta_resp_mux.state) - val set_hit_state = r_cpu_req_val && tag_hit && meta_resp_mux.state != new_hit_state + val new_hit_state = conf.co.newStateOnHit(s2_req.cmd, s2_hit_state) meta.io.state_req.bits.rw := Bool(true) - meta.io.state_req.bits.idx := Reg(r_cpu_req_idx(indexmsb,indexlsb)) - meta.io.state_req.bits.data.state := Reg(new_hit_state) - meta.io.state_req.bits.way_en := Reg(tag_match_way_oh) - meta.io.state_req.valid := Reg(set_hit_state, resetVal = Bool(false)) + meta.io.state_req.bits.idx := s2_req.addr(indexmsb,indexlsb) + meta.io.state_req.bits.data.state := new_hit_state + meta.io.state_req.bits.way_en := s2_tag_match_way + meta.io.state_req.valid := s2_valid_masked && s2_hit && s2_hit_state != new_hit_state // pending store data, also used for AMO RHS + s3_valid := (s2_valid_masked && s2_hit || s2_replay) && isWrite(s2_req.cmd) val amoalu = new AMOALU - when (r_cpu_req_val_ && r_req_write && p_store_rdy || r_replay_amo) { - p_store_idx := r_cpu_req_idx - p_store_type := r_cpu_req_type - p_store_cmd := r_cpu_req_cmd - p_store_way_oh := Mux(r_replay_amo, r_way_oh, tag_match_way_oh) - p_store_data := cpu_req_data - } - when (p_amo) { - p_store_data := amoalu.io.out + when ((s2_valid || s2_replay) && isWrite(s2_req.cmd)) { + s3_req := s2_req + s3_req.data := amoalu.io.out + s3_way := s2_tag_match_way } // miss handling - mshr.io.req.valid := r_cpu_req_val && r_req_readwrite && !nack_hit || flusher.io.mshr_req.valid - mshr.io.req.bits.tag_miss := !tag_hit || flusher.io.mshr_req.valid - mshr.io.req.bits.old_dirty := conf.co.needsWriteback(meta_wb_mux.state) && (!tag_match || flusher.io.mshr_req.valid) // don't wb upgrades - mshr.io.req.bits.old_tag := meta_wb_mux.tag - mshr.io.req.bits.tag := cpu_req_tag - mshr.io.req.bits.idx := r_cpu_req_idx(indexmsb,indexlsb) - mshr.io.req.bits.cpu_tag := r_cpu_req_tag - mshr.io.req.bits.offset := r_cpu_req_idx(offsetmsb,0) - mshr.io.req.bits.cmd := r_cpu_req_cmd - mshr.io.req.bits.typ := r_cpu_req_type - mshr.io.req.bits.way_oh := Mux(tag_match && !flusher.io.mshr_req.valid, tag_match_way_oh, replaced_way_oh) - mshr.io.req.bits.data := cpu_req_data + mshr.io.req.valid := s2_valid_masked && !s2_hit && (isRead(s2_req.cmd) || isWrite(s2_req.cmd)) && !s2_nack_hit + mshr.io.req.bits := s2_req + mshr.io.req.bits.old_dirty := conf.co.needsWriteback(s2_repl_state) && !s2_tag_match // don't wb upgrades + mshr.io.req.bits.old_tag := s2_repl_tag + mshr.io.req.bits.way_en := Mux(s2_tag_match, s2_tag_match_way, s2_replaced_way_en) + mshr.io.req.bits.data := s2_req.data mshr.io.mem_rep <> io.mem.xact_rep mshr.io.mem_abort.valid := io.mem.xact_abort.valid mshr.io.mem_abort.bits := io.mem.xact_abort.bits io.mem.xact_abort.ready := Bool(true) - mshr.io.meta_req <> meta_arb.io.in(1) when (mshr.io.req.fire()) { replacer.miss } // replays - val replay = mshr.io.data_req.bits - val stall_replay = r_replay_amo || p_amo || flusher.io.meta_req.valid || p_store_valid - val replay_val = mshr.io.data_req.valid - val replay_fire = replay_val && !stall_replay - val replay_rdy = data_arb.io.in(1).ready && !stall_replay - data_arb.io.in(1).bits.offset := replay.offset(offsetmsb,ramindexlsb) - data_arb.io.in(1).bits.idx := replay.idx - data_arb.io.in(1).bits.rw := replay.cmd === M_XWR - data_arb.io.in(1).valid := replay_fire - data_arb.io.in(1).bits.way_en := mshr.io.data_req.bits.way_oh - mshr.io.data_req.ready := replay_rdy - r_replay_amo := replay_amo_val && replay_rdy + readArb.io.in(0).valid := mshr.io.replay.valid + readArb.io.in(0).bits := mshr.io.replay.bits + readArb.io.in(0).bits.way_en := Fix(-1) + mshr.io.replay.ready := Bool(true) + s1_replay := mshr.io.replay.fire() + meta_arb.io.in(0) <> mshr.io.meta_req // probes prober.io.req <> io.mem.probe_req prober.io.rep <> io.mem.probe_rep prober.io.mshr_req <> mshr.io.probe prober.io.wb_req <> wb.io.probe - prober.io.tag_match_way_oh := tag_match_way_oh - prober.io.line_state := meta_resp_mux.state - prober.io.meta_req.ready := meta_arb.io.in(2).ready && !replay_amo_val - meta_arb.io.in(2).valid := prober.io.meta_req.valid - meta_arb.io.in(2).bits := prober.io.meta_req.bits - - // store write mask generation. - // assumes store replays are higher-priority than pending stores. - val store_offset = Mux(!replay_fire, p_store_idx(offsetmsb,0), replay.offset) - val store_type = Mux(!replay_fire, p_store_type, replay.typ) - val store_wmask_wide = StoreGen(store_type, store_offset, Bits(0)).mask << Cat(store_offset(ramindexlsb-1,offsetlsb), Bits(0, log2Up(conf.databytes))).toUFix - val store_data = Mux(!replay_fire, p_store_data, replay.data) - val store_data_wide = Fill(MEM_DATA_BITS/conf.databits, store_data) - data_arb.io.in(1).bits.data := store_data_wide - data_arb.io.in(1).bits.wmask := store_wmask_wide - data_arb.io.in(2).bits.data := store_data_wide - data_arb.io.in(2).bits.wmask := store_wmask_wide + prober.io.way_en := s2_tag_match_way + prober.io.line_state := s2_hit_state + prober.io.meta_req <> meta_arb.io.in(1) // load data subword mux/sign extension. // subword loads are delayed by one cycle. - val loadgen_data = data_resp_mux >> Cat(r_cpu_req_idx(log2Up(MEM_DATA_BITS/8)-1,3), Bits(0,6)) - val loadgen = LoadGen(r_cpu_req_type, r_cpu_req_idx, loadgen_data) + val loadgen_data = data_resp_mux >> Cat(s2_req.addr(log2Up(MEM_DATA_BITS/8)-1,3), Bits(0,log2Up(conf.databits))) + val loadgen = LoadGen(s2_req.typ, s2_req.addr, loadgen_data) - amoalu.io.cmd := p_store_cmd - amoalu.io.typ := p_store_type - amoalu.io.lhs := Reg(loadgen.word).toUFix - amoalu.io.rhs := p_store_data.toUFix + amoalu.io := s2_req + amoalu.io.lhs_raw := loadgen_data + amoalu.io.lhs := loadgen.word + amoalu.io.rhs := s2_req.data - early_nack := early_tag_nack || early_load_nack || r_cpu_req_val && r_req_amo || replay_amo_val || r_replay_amo + val s1_nack = p_store_match || dtlb.io.req.valid && dtlb.io.resp.miss || + idxMatch(s1_req, s2_req) && meta.io.state_req.valid || + s1_req.addr(indexmsb,indexlsb) === prober.io.meta_req.bits.idx && !prober.io.req.ready + s2_nack_hit := Reg(s1_nack) || mshr.io.secondary_miss + val s2_nack_miss = !s2_hit && !mshr.io.req.ready + val s2_nack = s2_nack_hit || s2_nack_miss + s2_valid_masked := s2_valid && !s2_nack - // we usually nack rather than reporting that the cache is not ready. - // fences and flushes are the exceptions. - val pending_fence = Reg(resetVal = Bool(false)) - pending_fence := (r_cpu_req_val_ && r_req_fence || pending_fence) && !mshr.io.fence_rdy - nack_hit := p_store_match || replay_val || r_req_write && !p_store_rdy || - p_store_idx_match && meta.io.state_req.valid || - !r_cpu_req_phys && dtlb.io.resp.miss - val nack_miss = !mshr.io.req.ready - val nack_flush = !mshr.io.fence_rdy && (r_req_fence || r_req_flush) || - !flushed && r_req_flush - val nack = early_nack || r_req_readwrite && (nack_hit || nack_miss) || nack_flush + // after a nack, block until nack condition resolves (saves energy) + val block_fence = Reg(resetVal = Bool(false)) + block_fence := (s1_valid && s1_req.cmd === M_FENCE || block_fence) && !mshr.io.fence_rdy + val block_miss = Reg(resetVal = Bool(false)) + block_miss := (s2_valid || block_miss) && s2_nack_miss + when (block_fence || block_miss) { + io.cpu.req.ready := Bool(false) + } - io.cpu.req.ready := flusher.io.req.ready && !(r_cpu_req_val_ && r_req_flush) && !pending_fence && (dtlb.io.req.ready || io.cpu.req.bits.phys) - io.cpu.resp.valid := (r_cpu_req_val && tag_hit && !mshr.io.secondary_miss && !nack && r_req_read) || mshr.io.cpu_resp_val - io.cpu.resp.bits.nack := r_cpu_req_val_ && !io.cpu.req.bits.kill && nack - io.cpu.resp.bits.replay := mshr.io.cpu_resp_val - io.cpu.resp.bits.miss := r_cpu_req_val_ && (!tag_hit || mshr.io.secondary_miss) && r_req_read - io.cpu.resp.bits.tag := Mux(mshr.io.cpu_resp_val, mshr.io.cpu_resp_tag, r_cpu_req_tag) - io.cpu.resp.bits.typ := r_cpu_req_type + val s2_read = isRead(s2_req.cmd) + io.cpu.resp.valid := s2_read && (s2_replay || s2_valid_masked && s2_hit) + io.cpu.resp.bits.nack := s2_valid && s2_nack + io.cpu.resp.bits.replay := s2_replay && s2_read + io.cpu.resp.bits.tag := s2_req.tag + io.cpu.resp.bits.typ := s2_req.typ io.cpu.resp.bits.data := loadgen.word - io.cpu.resp.bits.data_subword := Reg(loadgen.byte) + io.cpu.resp.bits.data_subword := loadgen.byte val xact_init_arb = (new Arbiter(2)) { new TransactionInit } xact_init_arb.io.in(0) <> wb.io.mem_req diff --git a/rocket/src/main/scala/package.scala b/rocket/src/main/scala/package.scala index 28fdcfad..22d18ca9 100644 --- a/rocket/src/main/scala/package.scala +++ b/rocket/src/main/scala/package.scala @@ -18,7 +18,7 @@ object Constants extends { def HAVE_RVC = false def HAVE_FPU = true - def HAVE_VEC = true + def HAVE_VEC = false val MAX_THREADS = hwacha.Constants.NUM_PVFB * hwacha.Constants.WIDTH_PVFB / hwacha.Constants.SZ_BANK diff --git a/rocket/src/main/scala/ptw.scala b/rocket/src/main/scala/ptw.scala index ac84e608..dff3590c 100644 --- a/rocket/src/main/scala/ptw.scala +++ b/rocket/src/main/scala/ptw.scala @@ -59,11 +59,10 @@ class PTW(n: Int)(implicit conf: RocketConfiguration) extends Component req_addr := Cat(io.dpath.ptbr(PADDR_BITS-1,PGIDX_BITS), arb.io.out.bits(VPN_BITS-1,VPN_BITS-bitsPerLevel), UFix(0,3)) } - val dmem_resp_val = Reg(io.mem.resp.valid, resetVal = Bool(false)) - when (dmem_resp_val) { - req_addr := Cat(io.mem.resp.bits.data_subword(PADDR_BITS-1, PGIDX_BITS), vpn_idx, UFix(0,3)).toUFix - r_resp_perm := io.mem.resp.bits.data_subword(9,4); - r_resp_ppn := io.mem.resp.bits.data_subword(PADDR_BITS-1, PGIDX_BITS); + when (io.mem.resp.valid) { + req_addr := Cat(io.mem.resp.bits.data(PADDR_BITS-1, PGIDX_BITS), vpn_idx, UFix(0,3)).toUFix + r_resp_perm := io.mem.resp.bits.data(9,4); + r_resp_ppn := io.mem.resp.bits.data(PADDR_BITS-1, PGIDX_BITS); } io.mem.req.valid := state === s_req @@ -76,8 +75,8 @@ class PTW(n: Int)(implicit conf: RocketConfiguration) extends Component val resp_val = state === s_done || state === s_error val resp_err = state === s_error || state === s_wait - val resp_ptd = io.mem.resp.bits.data_subword(1,0) === Bits(1) - val resp_pte = io.mem.resp.bits.data_subword(1,0) === Bits(2) + val resp_ptd = io.mem.resp.bits.data(1,0) === Bits(1) + val resp_pte = io.mem.resp.bits.data(1,0) === Bits(2) val resp_ppns = (0 until levels-1).map(i => Cat(r_resp_ppn(PPN_BITS-1, VPN_BITS-bitsPerLevel*(i+1)), r_req_vpn(VPN_BITS-1-bitsPerLevel*(i+1), 0))) val resp_ppn = (0 until levels-1).foldRight(r_resp_ppn)((i,j) => Mux(count === UFix(i), resp_ppns(i), j)) @@ -109,7 +108,7 @@ class PTW(n: Int)(implicit conf: RocketConfiguration) extends Component when (io.mem.resp.bits.nack) { state := s_req } - when (dmem_resp_val) { + when (io.mem.resp.valid) { when (resp_pte) { // page table entry state := s_done } diff --git a/rocket/src/main/scala/tile.scala b/rocket/src/main/scala/tile.scala index 2138561b..4833cd66 100644 --- a/rocket/src/main/scala/tile.scala +++ b/rocket/src/main/scala/tile.scala @@ -6,7 +6,8 @@ import Constants._ import uncore._ case class RocketConfiguration(ntiles: Int, co: CoherencePolicyWithUncached, - icache: ICacheConfig, dcache: DCacheConfig) + icache: ICacheConfig, dcache: DCacheConfig, + fastLoadByte: Boolean = false) { val dcacheReqTagBits = 9 // enforce compliance with require() }